]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
Implement fma in soft-fp.
authorJoseph Myers <joseph@codesourcery.com>
Tue, 2 Jul 2013 14:55:32 +0000 (14:55 +0000)
committerJoseph Myers <joseph@codesourcery.com>
Tue, 2 Jul 2013 14:55:32 +0000 (14:55 +0000)
21 files changed:
ChangeLog
ports/ChangeLog.mips
ports/sysdeps/mips/ieee754/s_fma.c [new file with mode: 0644]
ports/sysdeps/mips/ieee754/s_fmaf.c [new file with mode: 0644]
ports/sysdeps/mips/ieee754/s_fmal.c [new file with mode: 0644]
ports/sysdeps/mips/mips32/Implies
ports/sysdeps/mips/mips64/n32/s_fma.c [deleted file]
ports/sysdeps/mips/mips64/n64/s_fma.c [deleted file]
ports/sysdeps/mips/mips64/soft-fp/sfp-machine.h
ports/sysdeps/mips/soft-fp/sfp-machine.h
soft-fp/double.h
soft-fp/extended.h
soft-fp/fmadf4.c [new file with mode: 0644]
soft-fp/fmasf4.c [new file with mode: 0644]
soft-fp/fmatf4.c [new file with mode: 0644]
soft-fp/op-1.h
soft-fp/op-2.h
soft-fp/op-4.h
soft-fp/op-common.h
soft-fp/quad.h
soft-fp/single.h

index 406ca28bd2c20e9e7233cc6e3f47fd06de626428..4a2d19ad03acf227edbeb4feb49ed63590ff1dbc 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,54 @@
+2013-07-02  Joseph Myers  <joseph@codesourcery.com>
+
+       [BZ #13304]
+       * soft-fp/op-common.h (_FP_FMA): New macro.
+       * soft-fp/op-1.h (_FP_FRAC_HIGHBIT_DW_1): New macro.
+       (_FP_MUL_MEAT_DW_1_imm): Likewise.  Split out of ...
+       (_FP_MUL_MEAT_1_imm): ... here.
+       (_FP_MUL_MEAT_DW_1_wide): New macro.  Split out of ...
+       (_FP_MUL_MEAT_1_wide): ... here.
+       (_FP_MUL_MEAT_DW_1_hard): Likewise.  Split out of ...
+       (_FP_MUL_MEAT_1_hard): ... here.
+       * soft-fp/op-2.h (_FP_FRAC_HIGHBIT_DW_2): New macro.
+       (_FP_MUL_MEAT_DW_2_wide): Likewise.  Split out of ...
+       (_FP_MUL_MEAT_2_wide): ... here.
+       (_FP_MUL_MEAT_DW_2_wide_3mul): New macro.  Split out of ...
+       (_FP_MUL_MEAT_2_wide_3mul): ... here.
+       (_FP_MUL_MEAT_DW_2_gmp): New macro.  Split out of ...
+       (_FP_MUL_MEAT_2_gmp): ... here.
+       * soft-fp/op-4.h (_FP_FRAC_HIGHBIT_DW_4): New macro.
+       (_FP_MUL_MEAT_DW_4_wide): Likewise.  Split out of ...
+       (_FP_MUL_MEAT_4_wide): ... here.
+       (_FP_MUL_MEAT_DW_4_gmp): New macro.  Split out of ...
+       (_FP_MUL_MEAT_4_gmp): ... here.
+       * soft-fp/single.h (_FP_FRACTBITS_DW_S): New macro.
+       (_FP_WFRACBITS_DW_S): Likewise.
+       (_FP_WFRACXBITS_DW_S): Likewise.
+       (_FP_HIGHBIT_DW_S): Likewise.
+       (FP_FMA_S): Likewise.
+       (_FP_FRAC_HIGH_DW_S): Likewise.
+       * soft-fp/double.h (_FP_FRACTBITS_DW_D): New macro.
+       (_FP_WFRACBITS_DW_D): Likewise.
+       (_FP_WFRACXBITS_DW_D): Likewise.
+       (_FP_HIGHBIT_DW_D): Likewise.
+       (FP_FMA_D): Likewise.
+       (_FP_FRAC_HIGH_DW_D): Likewise.
+       * soft-fp/extended.h (_FP_FRACTBITS_DW_E): New macro.
+       (_FP_WFRACBITS_DW_E): Likewise.
+       (_FP_WFRACXBITS_DW_E): Likewise.
+       (_FP_HIGHBIT_DW_E): Likewise.
+       (FP_FMA_E): Likewise.
+       (_FP_FRAC_HIGH_DW_E): Likewise.
+       * soft-fp/quad.h (_FP_FRACTBITS_DW_Q): New macro.
+       (_FP_WFRACBITS_DW_Q): Likewise.
+       (_FP_WFRACXBITS_DW_Q): Likewise.
+       (_FP_HIGHBIT_DW_Q): Likewise.
+       (FP_FMA_Q): Likewise.
+       (_FP_FRAC_HIGH_DW_Q): Likewise.
+       * soft-fp/fmasf4.c: New file.
+       * soft-fp/fmadf4.c: Likewise.
+       * soft-fp/fmatf4.c: Likewise.
+
 2013-06-28  Liubov Dmitrieva  <liubov.dmitrieva@intel.com>
 
        * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Set
index 1f69593a2dfb81815958a3ed4e63577f5366b9bb..e985de998ef3a606393d50ab3992c6b461099044 100644 (file)
@@ -1,3 +1,21 @@
+2013-07-02  Joseph Myers  <joseph@codesourcery.com>
+
+       [BZ #13304]
+       * sysdeps/mips/ieee754/s_fma.c: New file.
+       * sysdeps/mips/ieee754/s_fmaf.c: Likewise.
+       * sysdeps/mips/ieee754/s_fmal.c: Likewise.
+       * sysdeps/mips/mips32/Implies: Add mips/soft-fp.
+       * sysdeps/mips/mips64/n32/s_fma.c: Remove file.
+       * sysdeps/mips/mips64/n64/s_fma.c: Likewise.
+       * sysdeps/mips/mips64/soft-fp/sfp-machine.h (_FP_MUL_MEAT_DW_S):
+       New macro.
+       (_FP_MUL_MEAT_DW_D): Likewise.
+       (_FP_MUL_MEAT_DW_Q): Likewise.
+       * sysdeps/mips/soft-fp/sfp-machine.h (_FP_MUL_MEAT_DW_S): New
+       macro.
+       (_FP_MUL_MEAT_DW_D): Likewise.
+       (_FP_MUL_MEAT_DW_Q): Likewise.
+
 2013-06-28  Ryan S. Arnold  <rsa@linux.vnet.ibm.com>
 
        * sysdeps/mips/dl-procinfo.h (_dl_procinfo): Add TYPE parameter
diff --git a/ports/sysdeps/mips/ieee754/s_fma.c b/ports/sysdeps/mips/ieee754/s_fma.c
new file mode 100644 (file)
index 0000000..5741414
--- /dev/null
@@ -0,0 +1,5 @@
+#ifdef __mips_hard_float
+# include <sysdeps/ieee754/dbl-64/s_fma.c>
+#else
+# include <soft-fp/fmadf4.c>
+#endif
diff --git a/ports/sysdeps/mips/ieee754/s_fmaf.c b/ports/sysdeps/mips/ieee754/s_fmaf.c
new file mode 100644 (file)
index 0000000..30bcdae
--- /dev/null
@@ -0,0 +1,5 @@
+#ifdef __mips_hard_float
+# include <sysdeps/ieee754/dbl-64/s_fmaf.c>
+#else
+# include <soft-fp/fmasf4.c>
+#endif
diff --git a/ports/sysdeps/mips/ieee754/s_fmal.c b/ports/sysdeps/mips/ieee754/s_fmal.c
new file mode 100644 (file)
index 0000000..6b83e91
--- /dev/null
@@ -0,0 +1,7 @@
+#include <sgidefs.h>
+
+#if _MIPS_SIM == _ABIO32
+# error "long double fma being compiled for o32 ABI"
+#endif
+
+#include <soft-fp/fmatf4.c>
index 6473f2517c50228b980a5ac9d5e7cd5cc66430be..42df98f45e38a17ce04e45b566ea501ee3111272 100644 (file)
@@ -1,3 +1,4 @@
 mips/ieee754
+mips/soft-fp
 mips
 wordsize-32
diff --git a/ports/sysdeps/mips/mips64/n32/s_fma.c b/ports/sysdeps/mips/mips64/n32/s_fma.c
deleted file mode 100644 (file)
index 74a1e01..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-/* MIPS long double is implemented in software by fp-bit (as of GCC
-   4.7) without support for exceptions or rounding modes, so the fma
-   implementation in terms of long double is slow and will not produce
-   correctly rounding results.  */
-
-#include <sysdeps/ieee754/dbl-64/s_fma.c>
diff --git a/ports/sysdeps/mips/mips64/n64/s_fma.c b/ports/sysdeps/mips/mips64/n64/s_fma.c
deleted file mode 100644 (file)
index 74a1e01..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-/* MIPS long double is implemented in software by fp-bit (as of GCC
-   4.7) without support for exceptions or rounding modes, so the fma
-   implementation in terms of long double is slow and will not produce
-   correctly rounding results.  */
-
-#include <sysdeps/ieee754/dbl-64/s_fma.c>
index 1bdde5ace87fd4072d3a36badc8cd485f28a35e5..9cfd6fbb7b73dfdd44959105d61ef0b287f5fdbb 100644 (file)
 #define _FP_MUL_MEAT_Q(R,X,Y)                                  \
   _FP_MUL_MEAT_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
 
+#define _FP_MUL_MEAT_DW_S(R,X,Y)                               \
+  _FP_MUL_MEAT_DW_1_imm(_FP_WFRACBITS_S,R,X,Y)
+#define _FP_MUL_MEAT_DW_D(R,X,Y)                               \
+  _FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_DW_Q(R,X,Y)                               \
+  _FP_MUL_MEAT_DW_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
 #define _FP_DIV_MEAT_S(R,X,Y)  _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
 #define _FP_DIV_MEAT_D(R,X,Y)  _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
 #define _FP_DIV_MEAT_Q(R,X,Y)  _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
index 8ccfaa60fd3096edc560f19b1de3970a081291f5..a60bef7665a7504b8667d3127732fc0d0a081821 100644 (file)
 #define _FP_MUL_MEAT_Q(R,X,Y)                          \
   _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
 
+#define _FP_MUL_MEAT_DW_S(R,X,Y)                               \
+  _FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_DW_D(R,X,Y)                               \
+  _FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_DW_Q(R,X,Y)                               \
+  _FP_MUL_MEAT_DW_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
 #define _FP_DIV_MEAT_S(R,X,Y)  _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
 #define _FP_DIV_MEAT_D(R,X,Y)  _FP_DIV_MEAT_2_udiv(D,R,X,Y)
 #define _FP_DIV_MEAT_Q(R,X,Y)  _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
index 759c2eb661adcaa7b81b615c469a80c0e32ed3ee..8653f69138036af38ee63a5b022c3535c71ba9fc 100644 (file)
 
 #if _FP_W_TYPE_SIZE < 64
 #define _FP_FRACTBITS_D                (2 * _FP_W_TYPE_SIZE)
+#define _FP_FRACTBITS_DW_D     (4 * _FP_W_TYPE_SIZE)
 #else
 #define _FP_FRACTBITS_D                _FP_W_TYPE_SIZE
+#define _FP_FRACTBITS_DW_D     (2 * _FP_W_TYPE_SIZE)
 #endif
 
 #define _FP_FRACBITS_D         53
 #define _FP_OVERFLOW_D         \
        ((_FP_W_TYPE)1 << _FP_WFRACBITS_D % _FP_W_TYPE_SIZE)
 
+#define _FP_WFRACBITS_DW_D     (2 * _FP_WFRACBITS_D)
+#define _FP_WFRACXBITS_DW_D    (_FP_FRACTBITS_DW_D - _FP_WFRACBITS_DW_D)
+#define _FP_HIGHBIT_DW_D       \
+  ((_FP_W_TYPE)1 << (_FP_WFRACBITS_DW_D - 1) % _FP_W_TYPE_SIZE)
+
 typedef float DFtype __attribute__((mode(DF)));
 
 #if _FP_W_TYPE_SIZE < 64
@@ -149,6 +156,7 @@ union _FP_UNION_D
 #define FP_DIV_D(R,X,Y)                        _FP_DIV(D,2,R,X,Y)
 #define FP_SQRT_D(R,X)                 _FP_SQRT(D,2,R,X)
 #define _FP_SQRT_MEAT_D(R,S,T,X,Q)     _FP_SQRT_MEAT_2(R,S,T,X,Q)
+#define FP_FMA_D(R,X,Y,Z)              _FP_FMA(D,2,4,R,X,Y,Z)
 
 #define FP_CMP_D(r,X,Y,un)     _FP_CMP(D,2,r,X,Y,un)
 #define FP_CMP_EQ_D(r,X,Y)     _FP_CMP_EQ(D,2,r,X,Y)
@@ -160,6 +168,8 @@ union _FP_UNION_D
 #define _FP_FRAC_HIGH_D(X)     _FP_FRAC_HIGH_2(X)
 #define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_2(X)
 
+#define _FP_FRAC_HIGH_DW_D(X)  _FP_FRAC_HIGH_4(X)
+
 #else
 
 union _FP_UNION_D
@@ -246,6 +256,7 @@ union _FP_UNION_D
 #define FP_DIV_D(R,X,Y)                        _FP_DIV(D,1,R,X,Y)
 #define FP_SQRT_D(R,X)                 _FP_SQRT(D,1,R,X)
 #define _FP_SQRT_MEAT_D(R,S,T,X,Q)     _FP_SQRT_MEAT_1(R,S,T,X,Q)
+#define FP_FMA_D(R,X,Y,Z)              _FP_FMA(D,1,2,R,X,Y,Z)
 
 /* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by
    the target machine.  */
@@ -260,4 +271,6 @@ union _FP_UNION_D
 #define _FP_FRAC_HIGH_D(X)     _FP_FRAC_HIGH_1(X)
 #define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_1(X)
 
+#define _FP_FRAC_HIGH_DW_D(X)  _FP_FRAC_HIGH_2(X)
+
 #endif /* W_TYPE_SIZE < 64 */
index 74927550eb21404e8cfb76e7c0201cff06c3955d..c8b1583086547782428c268f85624ea0216f5c3e 100644 (file)
 
 #if _FP_W_TYPE_SIZE < 64
 #define _FP_FRACTBITS_E         (4*_FP_W_TYPE_SIZE)
+#define _FP_FRACTBITS_DW_E     (8*_FP_W_TYPE_SIZE)
 #else
 #define _FP_FRACTBITS_E                (2*_FP_W_TYPE_SIZE)
+#define _FP_FRACTBITS_DW_E     (4*_FP_W_TYPE_SIZE)
 #endif
 
 #define _FP_FRACBITS_E         64
 #define _FP_OVERFLOW_E         \
        ((_FP_W_TYPE)1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
 
+#define _FP_WFRACBITS_DW_E     (2 * _FP_WFRACBITS_E)
+#define _FP_WFRACXBITS_DW_E    (_FP_FRACTBITS_DW_E - _FP_WFRACBITS_DW_E)
+#define _FP_HIGHBIT_DW_E       \
+  ((_FP_W_TYPE)1 << (_FP_WFRACBITS_DW_E - 1) % _FP_W_TYPE_SIZE)
+
 typedef float XFtype __attribute__((mode(XF)));
 
 #if _FP_W_TYPE_SIZE < 64
@@ -192,6 +199,7 @@ union _FP_UNION_E
 #define FP_MUL_E(R,X,Y)                _FP_MUL(E,4,R,X,Y)
 #define FP_DIV_E(R,X,Y)                _FP_DIV(E,4,R,X,Y)
 #define FP_SQRT_E(R,X)         _FP_SQRT(E,4,R,X)
+#define FP_FMA_E(R,X,Y,Z)      _FP_FMA(E,4,8,R,X,Y,Z)
 
 /*
  * Square root algorithms:
@@ -258,6 +266,8 @@ union _FP_UNION_E
 #define _FP_FRAC_HIGH_E(X)     (X##_f[2])
 #define _FP_FRAC_HIGH_RAW_E(X) (X##_f[1])
 
+#define _FP_FRAC_HIGH_DW_E(X)  (X##_f[4])
+
 #else   /* not _FP_W_TYPE_SIZE < 64 */
 union _FP_UNION_E
 {
@@ -383,6 +393,7 @@ union _FP_UNION_E
 #define FP_MUL_E(R,X,Y)                _FP_MUL(E,2,R,X,Y)
 #define FP_DIV_E(R,X,Y)                _FP_DIV(E,2,R,X,Y)
 #define FP_SQRT_E(R,X)         _FP_SQRT(E,2,R,X)
+#define FP_FMA_E(R,X,Y,Z)      _FP_FMA(E,2,4,R,X,Y,Z)
 
 /*
  * Square root algorithms:
@@ -427,4 +438,6 @@ union _FP_UNION_E
 #define _FP_FRAC_HIGH_E(X)     (X##_f1)
 #define _FP_FRAC_HIGH_RAW_E(X) (X##_f0)
 
+#define _FP_FRAC_HIGH_DW_E(X)  (X##_f[2])
+
 #endif /* not _FP_W_TYPE_SIZE < 64 */
diff --git a/soft-fp/fmadf4.c b/soft-fp/fmadf4.c
new file mode 100644 (file)
index 0000000..ebdc2b1
--- /dev/null
@@ -0,0 +1,56 @@
+/* Implement fma using soft-fp.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include "soft-fp.h"
+#include "double.h"
+
+double
+__fma (double a, double b, double c)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(C); FP_DECL_D(R);
+  double r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_D(A, a);
+  FP_UNPACK_D(B, b);
+  FP_UNPACK_D(C, c);
+  FP_FMA_D(R, A, B, C);
+  FP_PACK_D(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+#ifndef __fma
+weak_alias (__fma, fma)
+#endif
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__fma, __fmal)
+weak_alias (__fmal, fmal)
+#endif
diff --git a/soft-fp/fmasf4.c b/soft-fp/fmasf4.c
new file mode 100644 (file)
index 0000000..e8d60fb
--- /dev/null
@@ -0,0 +1,51 @@
+/* Implement fmaf using soft-fp.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include "soft-fp.h"
+#include "single.h"
+
+float
+__fmaf (float a, float b, float c)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A); FP_DECL_S(B); FP_DECL_S(C); FP_DECL_S(R);
+  float r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_S(A, a);
+  FP_UNPACK_S(B, b);
+  FP_UNPACK_S(C, c);
+  FP_FMA_S(R, A, B, C);
+  FP_PACK_S(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+#ifndef __fmaf
+weak_alias (__fmaf, fmaf)
+#endif
diff --git a/soft-fp/fmatf4.c b/soft-fp/fmatf4.c
new file mode 100644 (file)
index 0000000..cf48988
--- /dev/null
@@ -0,0 +1,49 @@
+/* Implement fmal using soft-fp.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include "soft-fp.h"
+#include "quad.h"
+
+long double
+__fmal (long double a, long double b, long double c)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); FP_DECL_Q(R);
+  long double r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_Q(A, a);
+  FP_UNPACK_Q(B, b);
+  FP_UNPACK_Q(C, c);
+  FP_FMA_Q(R, A, B, C);
+  FP_PACK_Q(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+weak_alias (__fmal, fmal)
index 8e05e2fab7e474ff2ae4180abaf4f3327b5018c5..a9ad0d62cd8c016a8de22dc4602b1cddba6aa5b6 100644 (file)
@@ -72,6 +72,7 @@ do {                                                  \
 #define _FP_FRAC_ZEROP_1(X)    (X##_f == 0)
 #define _FP_FRAC_OVERP_1(fs,X) (X##_f & _FP_OVERFLOW_##fs)
 #define _FP_FRAC_CLEAR_OVERP_1(fs,X)   (X##_f &= ~_FP_OVERFLOW_##fs)
+#define _FP_FRAC_HIGHBIT_DW_1(fs,X)    (X##_f & _FP_HIGHBIT_DW_##fs)
 #define _FP_FRAC_EQ_1(X, Y)    (X##_f == Y##_f)
 #define _FP_FRAC_GE_1(X, Y)    (X##_f >= Y##_f)
 #define _FP_FRAC_GT_1(X, Y)    (X##_f > Y##_f)
@@ -137,9 +138,14 @@ do {                                                       \
 /* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
    multiplication immediately.  */
 
-#define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y)                         \
+#define _FP_MUL_MEAT_DW_1_imm(wfracbits, R, X, Y)                      \
   do {                                                                 \
     R##_f = X##_f * Y##_f;                                             \
+  } while (0)
+
+#define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y)                         \
+  do {                                                                 \
+    _FP_MUL_MEAT_DW_1_imm(wfracbits, R, X, Y);                         \
     /* Normalize since we know where the msb of the multiplicands      \
        were (bit B), we know that the msb of the of the product is     \
        at either 2B or 2B-1.  */                                       \
@@ -148,10 +154,15 @@ do {                                                      \
 
 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 
+#define _FP_MUL_MEAT_DW_1_wide(wfracbits, R, X, Y, doit)               \
+  do {                                                                 \
+    doit(R##_f1, R##_f0, X##_f, Y##_f);                                        \
+  } while (0)
+
 #define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit)                  \
   do {                                                                 \
-    _FP_W_TYPE _Z_f0, _Z_f1;                                           \
-    doit(_Z_f1, _Z_f0, X##_f, Y##_f);                                  \
+    _FP_FRAC_DECL_2(_Z);                                               \
+    _FP_MUL_MEAT_DW_1_wide(wfracbits, _Z, X, Y, doit);                 \
     /* Normalize since we know where the msb of the multiplicands      \
        were (bit B), we know that the msb of the of the product is     \
        at either 2B or 2B-1.  */                                       \
@@ -161,9 +172,10 @@ do {                                                       \
 
 /* Finally, a simple widening multiply algorithm.  What fun!  */
 
-#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y)                                \
+#define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y)                     \
   do {                                                                 \
-    _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1;         \
+    _FP_W_TYPE _xh, _xl, _yh, _yl;                                     \
+    _FP_FRAC_DECL_2(_a);                                               \
                                                                        \
     /* split the words in half */                                      \
     _xh = X##_f >> (_FP_W_TYPE_SIZE/2);                                        \
@@ -172,17 +184,23 @@ do {                                                      \
     _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);                \
                                                                        \
     /* multiply the pieces */                                          \
-    _z_f0 = _xl * _yl;                                                 \
+    R##_f0 = _xl * _yl;                                                        \
     _a_f0 = _xh * _yl;                                                 \
     _a_f1 = _xl * _yh;                                                 \
-    _z_f1 = _xh * _yh;                                                 \
+    R##_f1 = _xh * _yh;                                                        \
                                                                        \
     /* reassemble into two full words */                               \
     if ((_a_f0 += _a_f1) < _a_f1)                                      \
-      _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2);                   \
+      R##_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2);                  \
     _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2);                              \
     _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2);                              \
-    _FP_FRAC_ADD_2(_z, _z, _a);                                                \
+    _FP_FRAC_ADD_2(R, R, _a);                                          \
+  } while (0)
+
+#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y)                                \
+  do {                                                                 \
+    _FP_FRAC_DECL_2(_z);                                               \
+    _FP_MUL_MEAT_DW_1_hard(wfracbits, _z, X, Y);                       \
                                                                        \
     /* normalize */                                                    \
     _FP_FRAC_SRS_2(_z, wfracbits - 1, 2*wfracbits);                    \
index 48e01d26dc4dc8c79747ac0da270f1a0920657c3..20088227e1915a4ce88b3ce82faa6d41e3cb4a16 100644 (file)
 #define _FP_FRAC_ZEROP_2(X)    ((X##_f1 | X##_f0) == 0)
 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
 #define _FP_FRAC_CLEAR_OVERP_2(fs,X)   (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
+#define _FP_FRAC_HIGHBIT_DW_2(fs,X)    \
+  (_FP_FRAC_HIGH_DW_##fs(X) & _FP_HIGHBIT_DW_##fs)
 #define _FP_FRAC_EQ_2(X, Y)    (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
 #define _FP_FRAC_GT_2(X, Y)    \
   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
 
 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 
-#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                  \
+#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)               \
   do {                                                                 \
-    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);     \
+    _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);                          \
                                                                        \
-    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);        \
+    doit(_FP_FRAC_WORD_4(R,1), _FP_FRAC_WORD_4(R,0), X##_f0, Y##_f0);  \
     doit(_b_f1, _b_f0, X##_f0, Y##_f1);                                        \
     doit(_c_f1, _c_f0, X##_f1, Y##_f0);                                        \
-    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1);        \
+    doit(_FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2), X##_f1, Y##_f1);  \
                                                                        \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
-                   _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,             \
-                   _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
-                   _FP_FRAC_WORD_4(_z,1));                             \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
-                   _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,             \
-                   _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
-                   _FP_FRAC_WORD_4(_z,1));                             \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),         \
+                   _FP_FRAC_WORD_4(R,1), 0, _b_f1, _b_f0,              \
+                   _FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),          \
+                   _FP_FRAC_WORD_4(R,1));                              \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),         \
+                   _FP_FRAC_WORD_4(R,1), 0, _c_f1, _c_f0,              \
+                   _FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),          \
+                   _FP_FRAC_WORD_4(R,1));                              \
+  } while (0)
+
+#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                  \
+  do {                                                                 \
+    _FP_FRAC_DECL_4(_z);                                               \
+                                                                       \
+    _FP_MUL_MEAT_DW_2_wide(wfracbits, _z, X, Y, doit);                 \
                                                                        \
     /* Normalize since we know where the msb of the multiplicands      \
        were (bit B), we know that the msb of the of the product is     \
    Do only 3 multiplications instead of four. This one is for machines
    where multiplication is much more expensive than subtraction.  */
 
-#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)             \
+#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)          \
   do {                                                                 \
-    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);     \
+    _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);                          \
     _FP_W_TYPE _d;                                                     \
     int _c1, _c2;                                                      \
                                                                        \
     _c1 = _b_f0 < X##_f0;                                              \
     _b_f1 = Y##_f0 + Y##_f1;                                           \
     _c2 = _b_f1 < Y##_f0;                                              \
-    doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);                   \
-    doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);  \
+    doit(_d, _FP_FRAC_WORD_4(R,0), X##_f0, Y##_f0);                    \
+    doit(_FP_FRAC_WORD_4(R,2), _FP_FRAC_WORD_4(R,1), _b_f0, _b_f1);    \
     doit(_c_f1, _c_f0, X##_f1, Y##_f1);                                        \
                                                                        \
     _b_f0 &= -_c2;                                                     \
     _b_f1 &= -_c1;                                                     \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
-                   _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,          \
-                   0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));   \
-    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),      \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),         \
+                   _FP_FRAC_WORD_4(R,1), (_c1 & _c2), 0, _d,           \
+                   0, _FP_FRAC_WORD_4(R,2), _FP_FRAC_WORD_4(R,1));     \
+    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),                \
                     _b_f0);                                            \
-    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),      \
+    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),                \
                     _b_f1);                                            \
-    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
-                   _FP_FRAC_WORD_4(_z,1),                              \
-                   0, _d, _FP_FRAC_WORD_4(_z,0));                      \
-    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
-                   _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);            \
-    __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),      \
+    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),         \
+                   _FP_FRAC_WORD_4(R,1),                               \
+                   0, _d, _FP_FRAC_WORD_4(R,0));                       \
+    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),         \
+                   _FP_FRAC_WORD_4(R,1), 0, _c_f1, _c_f0);             \
+    __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2),                \
                    _c_f1, _c_f0,                                       \
-                   _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));      \
+                   _FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2));        \
+  } while (0)
+
+#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)             \
+  do {                                                                 \
+    _FP_FRAC_DECL_4(_z);                                               \
+                                                                       \
+    _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, _z, X, Y, doit);            \
                                                                        \
     /* Normalize since we know where the msb of the multiplicands      \
        were (bit B), we know that the msb of the of the product is     \
     R##_f1 = _FP_FRAC_WORD_4(_z,1);                                    \
   } while (0)
 
-#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                         \
+#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)                      \
   do {                                                                 \
-    _FP_FRAC_DECL_4(_z);                                               \
     _FP_W_TYPE _x[2], _y[2];                                           \
     _x[0] = X##_f0; _x[1] = X##_f1;                                    \
     _y[0] = Y##_f0; _y[1] = Y##_f1;                                    \
                                                                        \
-    mpn_mul_n(_z_f, _x, _y, 2);                                                \
+    mpn_mul_n(R##_f, _x, _y, 2);                                       \
+  } while (0)
+
+#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                         \
+  do {                                                                 \
+    _FP_FRAC_DECL_4(_z);                                               \
+                                                                       \
+    _FP_MUL_MEAT_DW_2_gmp(wfracbits, _z, X, Y);                                \
                                                                        \
     /* Normalize since we know where the msb of the multiplicands      \
        were (bit B), we know that the msb of the of the product is     \
index fd31da90f855a97af7239b1b56972b09b38d152b..f16870d0f734b00eee0cf04e2df18015b63849b7 100644 (file)
 #define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
 #define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
 #define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_HIGHBIT_DW_4(fs,X)    \
+  (_FP_FRAC_HIGH_DW_##fs(X) & _FP_HIGHBIT_DW_##fs)
 #define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
 
 #define _FP_FRAC_EQ_4(X,Y)                             \
 
 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 
-#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)                      \
+#define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit)                   \
   do {                                                                     \
-    _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);         \
+    _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);                              \
     _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);         \
                                                                            \
-    doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
+    doit(_FP_FRAC_WORD_8(R,1), _FP_FRAC_WORD_8(R,0), X##_f[0], Y##_f[0]);   \
     doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);                                \
     doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);                                \
     doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);                                \
     doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);                                \
     doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);                                \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),           \
-                   _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,                   \
-                   0,0,_FP_FRAC_WORD_8(_z,1));                             \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),           \
-                   _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,                   \
-                   _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
-                   _FP_FRAC_WORD_8(_z,1));                                 \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),           \
-                   _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,                   \
-                   0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));         \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),           \
-                   _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,                   \
-                   _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
-                   _FP_FRAC_WORD_8(_z,2));                                 \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),           \
-                   _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,                   \
-                   _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
-                   _FP_FRAC_WORD_8(_z,2));                                 \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2),             \
+                   _FP_FRAC_WORD_8(R,1), 0,_b_f1,_b_f0,                    \
+                   0,0,_FP_FRAC_WORD_8(R,1));                              \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2),             \
+                   _FP_FRAC_WORD_8(R,1), 0,_c_f1,_c_f0,                    \
+                   _FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2),              \
+                   _FP_FRAC_WORD_8(R,1));                                  \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),             \
+                   _FP_FRAC_WORD_8(R,2), 0,_d_f1,_d_f0,                    \
+                   0,_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2));           \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),             \
+                   _FP_FRAC_WORD_8(R,2), 0,_e_f1,_e_f0,                    \
+                   _FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),              \
+                   _FP_FRAC_WORD_8(R,2));                                  \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),             \
+                   _FP_FRAC_WORD_8(R,2), 0,_f_f1,_f_f0,                    \
+                   _FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),              \
+                   _FP_FRAC_WORD_8(R,2));                                  \
     doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);                                \
     doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);                                \
     doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);                                \
     doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);                                \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),           \
-                   _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,                   \
-                   0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));         \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),           \
-                   _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,                   \
-                   _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
-                   _FP_FRAC_WORD_8(_z,3));                                 \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),           \
-                   _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,                   \
-                   _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
-                   _FP_FRAC_WORD_8(_z,3));                                 \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),           \
-                   _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,                   \
-                   _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
-                   _FP_FRAC_WORD_8(_z,3));                                 \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),             \
+                   _FP_FRAC_WORD_8(R,3), 0,_b_f1,_b_f0,                    \
+                   0,_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3));           \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),             \
+                   _FP_FRAC_WORD_8(R,3), 0,_c_f1,_c_f0,                    \
+                   _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
+                   _FP_FRAC_WORD_8(R,3));                                  \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),             \
+                   _FP_FRAC_WORD_8(R,3), 0,_d_f1,_d_f0,                    \
+                   _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
+                   _FP_FRAC_WORD_8(R,3));                                  \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),             \
+                   _FP_FRAC_WORD_8(R,3), 0,_e_f1,_e_f0,                    \
+                   _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
+                   _FP_FRAC_WORD_8(R,3));                                  \
     doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);                                \
     doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);                                \
     doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);                                \
     doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);                                \
     doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);                                \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),           \
-                   _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,                   \
-                   0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));         \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),           \
-                   _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,                   \
-                   _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
-                   _FP_FRAC_WORD_8(_z,4));                                 \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),           \
-                   _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,                   \
-                   _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
-                   _FP_FRAC_WORD_8(_z,4));                                 \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),           \
-                   _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,                   \
-                   0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));         \
-    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),           \
-                   _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,                   \
-                   _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
-                   _FP_FRAC_WORD_8(_z,5));                                 \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),             \
+                   _FP_FRAC_WORD_8(R,4), 0,_b_f1,_b_f0,                    \
+                   0,_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4));           \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),             \
+                   _FP_FRAC_WORD_8(R,4), 0,_c_f1,_c_f0,                    \
+                   _FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),              \
+                   _FP_FRAC_WORD_8(R,4));                                  \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),             \
+                   _FP_FRAC_WORD_8(R,4), 0,_d_f1,_d_f0,                    \
+                   _FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),              \
+                   _FP_FRAC_WORD_8(R,4));                                  \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6),             \
+                   _FP_FRAC_WORD_8(R,5), 0,_e_f1,_e_f0,                    \
+                   0,_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5));           \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6),             \
+                   _FP_FRAC_WORD_8(R,5), 0,_f_f1,_f_f0,                    \
+                   _FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6),              \
+                   _FP_FRAC_WORD_8(R,5));                                  \
     doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);                                \
-    __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),           \
+    __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6),             \
                    _b_f1,_b_f0,                                            \
-                   _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));           \
+                   _FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6));             \
+  } while (0)
+
+#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)                      \
+  do {                                                                     \
+    _FP_FRAC_DECL_8(_z);                                                   \
+                                                                           \
+    _FP_MUL_MEAT_DW_4_wide(wfracbits, _z, X, Y, doit);                     \
                                                                            \
     /* Normalize since we know where the msb of the multiplicands          \
        were (bit B), we know that the msb of the of the product is         \
                    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
   } while (0)
 
+#define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y)                          \
+  do {                                                                     \
+    mpn_mul_n(R##_f, _x_f, _y_f, 4);                                       \
+  } while (0)
+
 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)                             \
   do {                                                                     \
     _FP_FRAC_DECL_8(_z);                                                   \
                                                                            \
-    mpn_mul_n(_z_f, _x_f, _y_f, 4);                                        \
+    _FP_MUL_MEAT_DW_4_gmp(wfracbits, _z, X, Y);                                    \
                                                                            \
     /* Normalize since we know where the msb of the multiplicands          \
        were (bit B), we know that the msb of the of the product is         \
index c4acb9916139f393a33ccac8e00ceab1d9f51029..bed1e21fd4d14d95ab0727d168d275042c4ae6c4 100644 (file)
@@ -847,6 +847,217 @@ do {                                                      \
 } while (0)
 
 
+/* Fused multiply-add.  The input values should be cooked.  */
+
+#define _FP_FMA(fs, wc, dwc, R, X, Y, Z)                       \
+do {                                                           \
+  FP_DECL_##fs(T);                                             \
+  T##_s = X##_s ^ Y##_s;                                       \
+  T##_e = X##_e + Y##_e + 1;                                   \
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))                       \
+  {                                                            \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):           \
+    switch (Z##_c)                                             \
+      {                                                                \
+      case FP_CLS_INF:                                         \
+      case FP_CLS_NAN:                                         \
+       R##_s = Z##_s;                                          \
+       _FP_FRAC_COPY_##wc(R, Z);                               \
+       R##_c = Z##_c;                                          \
+       break;                                                  \
+                                                               \
+      case FP_CLS_ZERO:                                                \
+       R##_c = FP_CLS_NORMAL;                                  \
+       R##_s = T##_s;                                          \
+       R##_e = T##_e;                                          \
+                                                               \
+       _FP_MUL_MEAT_##fs(R, X, Y);                             \
+                                                               \
+       if (_FP_FRAC_OVERP_##wc(fs, R))                         \
+         _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);          \
+       else                                                    \
+         R##_e--;                                              \
+       break;                                                  \
+                                                               \
+      case FP_CLS_NORMAL:;                                     \
+       _FP_FRAC_DECL_##dwc(TD);                                \
+       _FP_FRAC_DECL_##dwc(ZD);                                \
+       _FP_FRAC_DECL_##dwc(RD);                                \
+       _FP_MUL_MEAT_DW_##fs(TD, X, Y);                         \
+       R##_e = T##_e;                                          \
+       int tsh = _FP_FRAC_HIGHBIT_DW_##dwc(fs, TD) == 0;       \
+       T##_e -= tsh;                                           \
+       int ediff = T##_e - Z##_e;                              \
+       if (ediff >= 0)                                         \
+         {                                                     \
+           int shift = _FP_WFRACBITS_##fs - tsh - ediff;       \
+           if (shift <= -_FP_WFRACBITS_##fs)                   \
+             _FP_FRAC_SET_##dwc(ZD, _FP_MINFRAC_##dwc);        \
+           else                                                \
+             {                                                 \
+               _FP_FRAC_COPY_##dwc##_##wc(ZD, Z);              \
+               if (shift < 0)                                  \
+                 _FP_FRAC_SRS_##dwc(ZD, -shift,                \
+                                    _FP_WFRACBITS_DW_##fs);    \
+               else if (shift > 0)                             \
+                 _FP_FRAC_SLL_##dwc(ZD, shift);                \
+             }                                                 \
+           R##_s = T##_s;                                      \
+           if (T##_s == Z##_s)                                 \
+             _FP_FRAC_ADD_##dwc(RD, TD, ZD);                   \
+           else                                                \
+             {                                                 \
+               _FP_FRAC_SUB_##dwc(RD, TD, ZD);                 \
+               if (_FP_FRAC_NEGP_##dwc(RD))                    \
+                 {                                             \
+                   R##_s = Z##_s;                              \
+                   _FP_FRAC_SUB_##dwc(RD, ZD, TD);             \
+                 }                                             \
+             }                                                 \
+         }                                                     \
+       else                                                    \
+         {                                                     \
+           R##_e = Z##_e;                                      \
+           R##_s = Z##_s;                                      \
+           _FP_FRAC_COPY_##dwc##_##wc(ZD, Z);                  \
+           _FP_FRAC_SLL_##dwc(ZD, _FP_WFRACBITS_##fs);         \
+           int shift = -ediff - tsh;                           \
+           if (shift >= _FP_WFRACBITS_DW_##fs)                 \
+             _FP_FRAC_SET_##dwc(TD, _FP_MINFRAC_##dwc);        \
+           else if (shift > 0)                                 \
+             _FP_FRAC_SRS_##dwc(TD, shift,                     \
+                                _FP_WFRACBITS_DW_##fs);        \
+           if (Z##_s == T##_s)                                 \
+             _FP_FRAC_ADD_##dwc(RD, ZD, TD);                   \
+           else                                                \
+             _FP_FRAC_SUB_##dwc(RD, ZD, TD);                   \
+         }                                                     \
+       if (_FP_FRAC_ZEROP_##dwc(RD))                           \
+         {                                                     \
+           if (T##_s == Z##_s)                                 \
+             R##_s = Z##_s;                                    \
+           else                                                \
+             R##_s = (FP_ROUNDMODE == FP_RND_MINF);            \
+           _FP_FRAC_SET_##wc(R, _FP_ZEROFRAC_##wc);            \
+           R##_c = FP_CLS_ZERO;                                \
+         }                                                     \
+       else                                                    \
+         {                                                     \
+           int rlz;                                            \
+           _FP_FRAC_CLZ_##dwc(rlz, RD);                        \
+           rlz -= _FP_WFRACXBITS_DW_##fs;                      \
+           R##_e -= rlz;                                       \
+           int shift = _FP_WFRACBITS_##fs - rlz;               \
+           if (shift > 0)                                      \
+             _FP_FRAC_SRS_##dwc(RD, shift,                     \
+                                _FP_WFRACBITS_DW_##fs);        \
+           else if (shift < 0)                                 \
+             _FP_FRAC_SLL_##dwc(RD, -shift);                   \
+           _FP_FRAC_COPY_##wc##_##dwc(R, RD);                  \
+           R##_c = FP_CLS_NORMAL;                              \
+         }                                                     \
+       break;                                                  \
+      }                                                                \
+    goto done_fma;                                             \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):                 \
+    _FP_CHOOSENAN(fs, wc, T, X, Y, '*');                       \
+    break;                                                     \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):              \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):                 \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):                        \
+    T##_s = X##_s;                                             \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):                 \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):              \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):             \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):               \
+    _FP_FRAC_COPY_##wc(T, X);                                  \
+    T##_c = X##_c;                                             \
+    break;                                                     \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):              \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):                 \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):                        \
+    T##_s = Y##_s;                                             \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):              \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):             \
+    _FP_FRAC_COPY_##wc(T, Y);                                  \
+    T##_c = Y##_c;                                             \
+    break;                                                     \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):                        \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):                        \
+    T##_s = _FP_NANSIGN_##fs;                                  \
+    T##_c = FP_CLS_NAN;                                                \
+    _FP_FRAC_SET_##wc(T, _FP_NANFRAC_##fs);                    \
+    FP_SET_EXCEPTION(FP_EX_INVALID);                           \
+    break;                                                     \
+                                                               \
+  default:                                                     \
+    abort();                                                   \
+  }                                                            \
+                                                               \
+  /* T = X * Y is zero, infinity or NaN.  */                   \
+  switch (_FP_CLS_COMBINE(T##_c, Z##_c))                       \
+  {                                                            \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):                 \
+    _FP_CHOOSENAN(fs, wc, R, T, Z, '+');                       \
+    break;                                                     \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):              \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):                 \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):                        \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):              \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):                        \
+    R##_s = T##_s;                                             \
+    _FP_FRAC_COPY_##wc(R, T);                                  \
+    R##_c = T##_c;                                             \
+    break;                                                     \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):                 \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):                        \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):             \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):                        \
+    R##_s = Z##_s;                                             \
+    _FP_FRAC_COPY_##wc(R, Z);                                  \
+    R##_c = Z##_c;                                             \
+    break;                                                     \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):                 \
+    if (T##_s == Z##_s)                                                \
+      {                                                                \
+       R##_s = Z##_s;                                          \
+       _FP_FRAC_COPY_##wc(R, Z);                               \
+       R##_c = Z##_c;                                          \
+      }                                                                \
+    else                                                       \
+      {                                                                \
+       R##_s = _FP_NANSIGN_##fs;                               \
+       R##_c = FP_CLS_NAN;                                     \
+       _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);                 \
+       FP_SET_EXCEPTION(FP_EX_INVALID);                        \
+      }                                                                \
+    break;                                                     \
+                                                               \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):               \
+    if (T##_s == Z##_s)                                                \
+      R##_s = Z##_s;                                           \
+    else                                                       \
+      R##_s = (FP_ROUNDMODE == FP_RND_MINF);                   \
+    _FP_FRAC_COPY_##wc(R, Z);                                  \
+    R##_c = Z##_c;                                             \
+    break;                                                     \
+                                                               \
+  default:                                                     \
+    abort();                                                   \
+  }                                                            \
+ done_fma: ;                                                   \
+} while (0)
+
+
 /*
  * Main division routine.  The input values should be cooked.
  */
index f0aa07e74f1363c0fc420119c81bbee877ae36e2..9a16bf32844ae1b67dd8ceb7f5504f761006c7e1 100644 (file)
 
 #if _FP_W_TYPE_SIZE < 64
 #define _FP_FRACTBITS_Q         (4*_FP_W_TYPE_SIZE)
+#define _FP_FRACTBITS_DW_Q     (8*_FP_W_TYPE_SIZE)
 #else
 #define _FP_FRACTBITS_Q                (2*_FP_W_TYPE_SIZE)
+#define _FP_FRACTBITS_DW_Q     (4*_FP_W_TYPE_SIZE)
 #endif
 
 #define _FP_FRACBITS_Q         113
 #define _FP_OVERFLOW_Q         \
        ((_FP_W_TYPE)1 << (_FP_WFRACBITS_Q % _FP_W_TYPE_SIZE))
 
+#define _FP_WFRACBITS_DW_Q     (2 * _FP_WFRACBITS_Q)
+#define _FP_WFRACXBITS_DW_Q    (_FP_FRACTBITS_DW_Q - _FP_WFRACBITS_DW_Q)
+#define _FP_HIGHBIT_DW_Q       \
+  ((_FP_W_TYPE)1 << (_FP_WFRACBITS_DW_Q - 1) % _FP_W_TYPE_SIZE)
+
 typedef float TFtype __attribute__((mode(TF)));
 
 #if _FP_W_TYPE_SIZE < 64
@@ -155,6 +162,7 @@ union _FP_UNION_Q
 #define FP_DIV_Q(R,X,Y)                        _FP_DIV(Q,4,R,X,Y)
 #define FP_SQRT_Q(R,X)                 _FP_SQRT(Q,4,R,X)
 #define _FP_SQRT_MEAT_Q(R,S,T,X,Q)     _FP_SQRT_MEAT_4(R,S,T,X,Q)
+#define FP_FMA_Q(R,X,Y,Z)              _FP_FMA(Q,4,8,R,X,Y,Z)
 
 #define FP_CMP_Q(r,X,Y,un)     _FP_CMP(Q,4,r,X,Y,un)
 #define FP_CMP_EQ_Q(r,X,Y)     _FP_CMP_EQ(Q,4,r,X,Y)
@@ -166,6 +174,8 @@ union _FP_UNION_Q
 #define _FP_FRAC_HIGH_Q(X)     _FP_FRAC_HIGH_4(X)
 #define _FP_FRAC_HIGH_RAW_Q(X) _FP_FRAC_HIGH_4(X)
 
+#define _FP_FRAC_HIGH_DW_Q(X)  _FP_FRAC_HIGH_8(X)
+
 #else   /* not _FP_W_TYPE_SIZE < 64 */
 union _FP_UNION_Q
 {
@@ -256,6 +266,7 @@ union _FP_UNION_Q
 #define FP_DIV_Q(R,X,Y)                        _FP_DIV(Q,2,R,X,Y)
 #define FP_SQRT_Q(R,X)                 _FP_SQRT(Q,2,R,X)
 #define _FP_SQRT_MEAT_Q(R,S,T,X,Q)     _FP_SQRT_MEAT_2(R,S,T,X,Q)
+#define FP_FMA_Q(R,X,Y,Z)              _FP_FMA(Q,2,4,R,X,Y,Z)
 
 #define FP_CMP_Q(r,X,Y,un)     _FP_CMP(Q,2,r,X,Y,un)
 #define FP_CMP_EQ_Q(r,X,Y)     _FP_CMP_EQ(Q,2,r,X,Y)
@@ -267,4 +278,6 @@ union _FP_UNION_Q
 #define _FP_FRAC_HIGH_Q(X)     _FP_FRAC_HIGH_2(X)
 #define _FP_FRAC_HIGH_RAW_Q(X) _FP_FRAC_HIGH_2(X)
 
+#define _FP_FRAC_HIGH_DW_Q(X)  _FP_FRAC_HIGH_4(X)
+
 #endif /* not _FP_W_TYPE_SIZE < 64 */
index dec0031e9a925485d10dfbda967e5e9c27f70c53..c94f31f99b0cb63109166e10e86042e034ad158c 100644 (file)
 
 #define _FP_FRACTBITS_S                _FP_W_TYPE_SIZE
 
+#if _FP_W_TYPE_SIZE < 64
+# define _FP_FRACTBITS_DW_S    (2 * _FP_W_TYPE_SIZE)
+#else
+# define _FP_FRACTBITS_DW_S    _FP_W_TYPE_SIZE
+#endif
+
 #define _FP_FRACBITS_S         24
 #define _FP_FRACXBITS_S                (_FP_FRACTBITS_S - _FP_FRACBITS_S)
 #define _FP_WFRACBITS_S                (_FP_WORKBITS + _FP_FRACBITS_S)
 #define _FP_IMPLBIT_SH_S       ((_FP_W_TYPE)1 << (_FP_FRACBITS_S-1+_FP_WORKBITS))
 #define _FP_OVERFLOW_S         ((_FP_W_TYPE)1 << (_FP_WFRACBITS_S))
 
+#define _FP_WFRACBITS_DW_S     (2 * _FP_WFRACBITS_S)
+#define _FP_WFRACXBITS_DW_S    (_FP_FRACTBITS_DW_S - _FP_WFRACBITS_DW_S)
+#define _FP_HIGHBIT_DW_S       \
+  ((_FP_W_TYPE)1 << (_FP_WFRACBITS_DW_S - 1) % _FP_W_TYPE_SIZE)
+
 /* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be
    chosen by the target machine.  */
 
@@ -139,6 +150,12 @@ union _FP_UNION_S
 #define FP_SQRT_S(R,X)                 _FP_SQRT(S,1,R,X)
 #define _FP_SQRT_MEAT_S(R,S,T,X,Q)     _FP_SQRT_MEAT_1(R,S,T,X,Q)
 
+#if _FP_W_TYPE_SIZE < 64
+# define FP_FMA_S(R, X, Y, Z)  _FP_FMA(S, 1, 2, R, X, Y, Z)
+#else
+# define FP_FMA_S(R, X, Y, Z)  _FP_FMA(S, 1, 1, R, X, Y, Z)
+#endif
+
 #define FP_CMP_S(r,X,Y,un)     _FP_CMP(S,1,r,X,Y,un)
 #define FP_CMP_EQ_S(r,X,Y)     _FP_CMP_EQ(S,1,r,X,Y)
 #define FP_CMP_UNORD_S(r,X,Y)  _FP_CMP_UNORD(S,1,r,X,Y)
@@ -148,3 +165,9 @@ union _FP_UNION_S
 
 #define _FP_FRAC_HIGH_S(X)     _FP_FRAC_HIGH_1(X)
 #define _FP_FRAC_HIGH_RAW_S(X) _FP_FRAC_HIGH_1(X)
+
+#if _FP_W_TYPE_SIZE < 64
+# define _FP_FRAC_HIGH_DW_S(X) _FP_FRAC_HIGH_2(X)
+#else
+# define _FP_FRAC_HIGH_DW_S(X) _FP_FRAC_HIGH_1(X)
+#endif