You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
+ <https://www.gnu.org/licenses/>. */
#ifndef _FENV_LIBC_H
#define _FENV_LIBC_H 1
extern const fenv_t *__fe_mask_env (void) attribute_hidden;
+/* If the old env had any enabled exceptions and the new env has no enabled
+ exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
+ FPU to run faster because it always takes the default action and can not
+ generate SIGFPE. */
+#define __TEST_AND_ENTER_NON_STOP(old, new) \
+ do { \
+ if (((old) & FPSCR_ENABLES_MASK) != 0 && ((new) & FPSCR_ENABLES_MASK) == 0) \
+ (void) __fe_mask_env (); \
+ } while (0)
+
+/* If the old env has no enabled exceptions and the new env has any enabled
+ exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
+ hardware into "precise mode" and may cause the FPU to run slower on some
+ hardware. */
+#define __TEST_AND_EXIT_NON_STOP(old, new) \
+ do { \
+ if (((old) & FPSCR_ENABLES_MASK) == 0 && ((new) & FPSCR_ENABLES_MASK) != 0) \
+ (void) __fe_nomask_env_priv (); \
+ } while (0)
+
/* The sticky bits in the FPSCR indicating exceptions have occurred. */
#define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID)
#define fegetenv_register() __builtin_mffs()
/* Equivalent to fegetenv_register, but only returns bits for
- status, exception enables, and mode. */
-
-#define fegetenv_status_ISA300() \
+ status, exception enables, and mode.
+ Nicely, it turns out that the 'mffsl' instruction will decode to
+ 'mffs' on architectures older than "power9" because the additional
+ bits set for 'mffsl' are "don't care" for 'mffs'. 'mffs' is a superset
+ of 'mffsl'. */
+#define fegetenv_control() \
({register double __fr; \
__asm__ __volatile__ ( \
".machine push; .machine \"power9\"; mffsl %0; .machine pop" \
__fr; \
})
+#define __fe_mffscrn(rn) \
+ ({register fenv_union_t __fr; \
+ if (__builtin_constant_p (rn)) \
+ __asm__ __volatile__ ( \
+ ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
+ : "=f" (__fr.fenv) : "i" (rn)); \
+ else \
+ { \
+ __fr.l = (rn); \
+ __asm__ __volatile__ ( \
+ ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
+ : "=f" (__fr.fenv) : "f" (__fr.fenv)); \
+ } \
+ __fr.fenv; \
+ })
+
+/* Like fegetenv_control, but also sets the rounding mode. */
#ifdef _ARCH_PWR9
-# define fegetenv_status() fegetenv_status_ISA300()
-#elif defined __BUILTIN_CPU_SUPPORTS__
-# define fegetenv_status() \
- (__glibc_likely (__builtin_cpu_supports ("arch_3_00")) \
- ? fegetenv_status_ISA300() \
- : fegetenv_register() \
- )
+#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
#else
-# define fegetenv_status() fegetenv_register ()
+/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
+ but not sufficient, because it does not set the rounding mode.
+ Explicitly set the rounding mode when 'mffscrn' actually doesn't. */
+#define fegetenv_and_set_rn(rn) \
+ ({register fenv_union_t __fr; \
+ __fr.fenv = __fe_mffscrn (rn); \
+ if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))) \
+ __fesetround_inline (rn); \
+ __fr.fenv; \
+ })
#endif
/* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */
__builtin_mtfsf (0xff, d); \
} while(0)
+/* Set the last 2 nibbles of the FPSCR, which contain the
+ exception enables and the rounding mode.
+ 'fegetenv_control' retrieves these bits by reading the FPSCR. */
+#define fesetenv_control(env) __builtin_mtfsf (0b00000011, (env));
+
/* This very handy macro:
- Sets the rounding mode to 'round to nearest';
- Sets the processor into IEEE mode; and
static inline int
__fesetround_inline (int round)
{
- if ((unsigned int) round < 2)
+#ifdef _ARCH_PWR9
+ __fe_mffscrn (round);
+#else
+ if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
+ __fe_mffscrn (round);
+ else if ((unsigned int) round < 2)
{
asm volatile ("mtfsb0 30");
if ((unsigned int) round == 0)
else
asm volatile ("mtfsb1 31");
}
-
+#endif
return 0;
}
static inline void
__fesetround_inline_nocheck (const int round)
{
- asm volatile ("mtfsfi 7,%0" : : "i" (round));
+#ifdef _ARCH_PWR9
+ __fe_mffscrn (round);
+#else
+ if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
+ __fe_mffscrn (round);
+ else
+ asm volatile ("mtfsfi 7,%0" : : "i" (round));
+#endif
}
+#define FPSCR_MASK(bit) (1 << (31 - (bit)))
+
/* Definitions of all the FPSCR bit numbers */
enum {
FPSCR_FX = 0, /* exception summary */
+#define FPSCR_FX_MASK (FPSCR_MASK (FPSCR_FX))
FPSCR_FEX, /* enabled exception summary */
+#define FPSCR_FEX_MASK (FPSCR_MASK FPSCR_FEX))
FPSCR_VX, /* invalid operation summary */
+#define FPSCR_VX_MASK (FPSCR_MASK (FPSCR_VX))
FPSCR_OX, /* overflow */
+#define FPSCR_OX_MASK (FPSCR_MASK (FPSCR_OX))
FPSCR_UX, /* underflow */
+#define FPSCR_UX_MASK (FPSCR_MASK (FPSCR_UX))
FPSCR_ZX, /* zero divide */
+#define FPSCR_ZX_MASK (FPSCR_MASK (FPSCR_ZX))
FPSCR_XX, /* inexact */
+#define FPSCR_XX_MASK (FPSCR_MASK (FPSCR_XX))
FPSCR_VXSNAN, /* invalid operation for sNaN */
+#define FPSCR_VXSNAN_MASK (FPSCR_MASK (FPSCR_VXSNAN))
FPSCR_VXISI, /* invalid operation for Inf-Inf */
+#define FPSCR_VXISI_MASK (FPSCR_MASK (FPSCR_VXISI))
FPSCR_VXIDI, /* invalid operation for Inf/Inf */
+#define FPSCR_VXIDI_MASK (FPSCR_MASK (FPSCR_VXIDI))
FPSCR_VXZDZ, /* invalid operation for 0/0 */
+#define FPSCR_VXZDZ_MASK (FPSCR_MASK (FPSCR_VXZDZ))
FPSCR_VXIMZ, /* invalid operation for Inf*0 */
+#define FPSCR_VXIMZ_MASK (FPSCR_MASK (FPSCR_VXIMZ))
FPSCR_VXVC, /* invalid operation for invalid compare */
+#define FPSCR_VXVC_MASK (FPSCR_MASK (FPSCR_VXVC))
FPSCR_FR, /* fraction rounded [fraction was incremented by round] */
+#define FPSCR_FR_MASK (FPSCR_MASK (FPSCR_FR))
FPSCR_FI, /* fraction inexact */
+#define FPSCR_FI_MASK (FPSCR_MASK (FPSCR_FI))
FPSCR_FPRF_C, /* result class descriptor */
+#define FPSCR_FPRF_C_MASK (FPSCR_MASK (FPSCR_FPRF_C))
FPSCR_FPRF_FL, /* result less than (usually, less than 0) */
+#define FPSCR_FPRF_FL_MASK (FPSCR_MASK (FPSCR_FPRF_FL))
FPSCR_FPRF_FG, /* result greater than */
+#define FPSCR_FPRF_FG_MASK (FPSCR_MASK (FPSCR_FPRF_FG))
FPSCR_FPRF_FE, /* result equal to */
+#define FPSCR_FPRF_FE_MASK (FPSCR_MASK (FPSCR_FPRF_FE))
FPSCR_FPRF_FU, /* result unordered */
+#define FPSCR_FPRF_FU_MASK (FPSCR_MASK (FPSCR_FPRF_FU))
FPSCR_20, /* reserved */
FPSCR_VXSOFT, /* invalid operation set by software */
+#define FPSCR_VXSOFT_MASK (FPSCR_MASK (FPSCR_VXSOFT))
FPSCR_VXSQRT, /* invalid operation for square root */
+#define FPSCR_VXSQRT_MASK (FPSCR_MASK (FPSCR_VXSQRT))
FPSCR_VXCVI, /* invalid operation for invalid integer convert */
+#define FPSCR_VXCVI_MASK (FPSCR_MASK (FPSCR_VXCVI))
FPSCR_VE, /* invalid operation exception enable */
+#define FPSCR_VE_MASK (FPSCR_MASK (FPSCR_VE))
FPSCR_OE, /* overflow exception enable */
+#define FPSCR_OE_MASK (FPSCR_MASK (FPSCR_OE))
FPSCR_UE, /* underflow exception enable */
+#define FPSCR_UE_MASK (FPSCR_MASK (FPSCR_UE))
FPSCR_ZE, /* zero divide exception enable */
+#define FPSCR_ZE_MASK (FPSCR_MASK (FPSCR_ZE))
FPSCR_XE, /* inexact exception enable */
+#define FPSCR_XE_MASK (FPSCR_MASK (FPSCR_XE))
#ifdef _ARCH_PWR6
FPSCR_29, /* Reserved in ISA 2.05 */
+#define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_29))
#else
- FPSCR_NI /* non-IEEE mode (typically, no denormalised numbers) */
+ FPSCR_NI, /* non-IEEE mode (typically, no denormalised numbers) */
+#define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_NI))
#endif /* _ARCH_PWR6 */
/* the remaining two least-significant bits keep the rounding mode */
+ FPSCR_RN_hi,
+#define FPSCR_RN_hi_MASK (FPSCR_MASK (FPSCR_RN_hi))
+ FPSCR_RN_lo
+#define FPSCR_RN_lo_MASK (FPSCR_MASK (FPSCR_RN_lo))
};
+#define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK)
+#define FPSCR_ENABLES_MASK \
+ (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
+#define FPSCR_BASIC_EXCEPTIONS_MASK \
+ (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
+#define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \
+ FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \
+ FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \
+ FPSCR_VXCVI_MASK)
+#define FPSCR_FPRF_MASK \
+ (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
+ FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
+#define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
+#define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
+
+/* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
+ in the FPSCR, albeit shifted to different but corresponding locations.
+ Similarly, the exception indicator bits in the FPSCR correspond one-to-one
+ with the exception enable bits. It is thus possible to map the FENV(1)
+ exceptions directly to the FPSCR enables with a simple mask and shift,
+ and vice versa. */
+#define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22
+
static inline int
fenv_reg_to_exceptions (unsigned long long l)
{
- int result = 0;
- if (l & (1 << (31 - FPSCR_XE)))
- result |= FE_INEXACT;
- if (l & (1 << (31 - FPSCR_ZE)))
- result |= FE_DIVBYZERO;
- if (l & (1 << (31 - FPSCR_UE)))
- result |= FE_UNDERFLOW;
- if (l & (1 << (31 - FPSCR_OE)))
- result |= FE_OVERFLOW;
- if (l & (1 << (31 - FPSCR_VE)))
- result |= FE_INVALID;
- return result;
+ return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT;
+}
+
+static inline unsigned long long
+fenv_exceptions_to_reg (int excepts)
+{
+ return (unsigned long long)
+ (excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT;
}
#ifdef _ARCH_PWR6