halfulp-fma mpexp-fma \
mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma
+libm-sysdep_routines += x86-math-features
+
CFLAGS-doasin-fma.c = -mfma -mavx2
CFLAGS-dosincos-fma.c = -mfma -mavx2
CFLAGS-e_asin-fma.c = -mfma -mavx2
<http://www.gnu.org/licenses/>. */
#include <init-arch.h>
+#include <x86-math-features.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ unsigned int features = __x86_math_features ();
- if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ if ((features & x86_math_feature_fma)
+ && (features & x86_math_feature_avx2))
return OPTIMIZE (fma);
-
- if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable))
+ if (features & x86_math_feature_fma4)
return OPTIMIZE (fma4);
-
- if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Usable))
+ if (features & x86_math_feature_avx)
return OPTIMIZE (avx);
return OPTIMIZE (sse2);
<http://www.gnu.org/licenses/>. */
#include <init-arch.h>
+#include <x86-math-features.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ unsigned int features = __x86_math_features ();
- if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ if ((features & x86_math_feature_fma)
+ && (features & x86_math_feature_avx2))
return OPTIMIZE (fma);
return OPTIMIZE (sse2);
<http://www.gnu.org/licenses/>. */
#include <init-arch.h>
+#include <x86-math-features.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ unsigned int features = __x86_math_features ();
- if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ if ((features & x86_math_feature_fma)
+ && (features & x86_math_feature_avx2))
return OPTIMIZE (fma);
-
- if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable))
+ if (features & x86_math_feature_fma4)
return OPTIMIZE (fma4);
return OPTIMIZE (sse2);
<http://www.gnu.org/licenses/>. */
#include <init-arch.h>
+#include <x86-math-features.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (c) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ unsigned int features = __x86_math_features ();
- if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+ if (features & x86_math_feature_sse41)
return OPTIMIZE (sse41);
return OPTIMIZE (c);
#include <config.h>
#include <math.h>
#include <init-arch.h>
+#include <x86-math-features.h>
#include <libm-alias-double.h>
extern double __fma_sse2 (double x, double y, double z) attribute_hidden;
}
-libm_ifunc (__fma, HAS_ARCH_FEATURE (FMA_Usable)
- ? __fma_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
+libm_ifunc (__fma, __x86_math_features () & x86_math_feature_fma
+ ? __fma_fma3 : (__x86_math_features () & x86_math_feature_fma4
? __fma_fma4 : __fma_sse2));
libm_alias_double (__fma, fma)
#include <config.h>
#include <math.h>
#include <init-arch.h>
+#include <x86-math-features.h>
#include <libm-alias-float.h>
extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden;
}
-libm_ifunc (__fmaf, HAS_ARCH_FEATURE (FMA_Usable)
- ? __fmaf_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
+libm_ifunc (__fmaf, __x86_math_features () & x86_math_feature_fma
+ ? __fmaf_fma3 : (__x86_math_features () & x86_math_feature_fma4
? __fmaf_fma4 : __fmaf_sse2));
libm_alias_float (__fma, fma)
--- /dev/null
+/* Initialize CPU features for use by the math library.
+ This file is part of the GNU C Library.
+ Copyright (C) 2008-2018 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <x86-math-features.h>
+#include <atomic.h>
+#include <cpuid.h>
+#include <cpu-features.h>
+#include <stdbool.h>
+
+static unsigned int features;
+
+unsigned int
+__x86_math_features (void)
+{
+ unsigned int features_local = atomic_load_relaxed (&features);
+ if (features_local != 0)
+ /* At least the initialization bit is set, which means that we
+ have a proper value. */
+ return features_local;
+
+ /* Perform initialization. */
+ features_local = x86_math_feature_initialized;
+
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int max_cpuid;
+ __cpuid (0, max_cpuid, ebx, ecx, edx);
+ bool cpu_amd = ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65;
+
+ if (max_cpuid >= 7)
+ {
+ __cpuid (1, eax, ebx, ecx, edx);
+ bool flag_fma = ecx & bit_cpu_FMA;
+ bool flag_osxsave = ecx & bit_cpu_OSXSAVE;
+ bool flag_avx = ecx & bit_cpu_AVX;
+ bool flag_sse41 = ecx & bit_cpu_SSE4_1;
+
+ if (flag_sse41)
+ features_local |= x86_math_feature_sse41;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+ bool flag_avx2 = ebx & bit_cpu_AVX2;
+
+ if (flag_osxsave)
+ {
+ unsigned int xcrlow;
+ unsigned int xcrhigh;
+ asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+ bool ymm_xmm_usable
+ = (xcrlow & (bit_YMM_state | bit_XMM_state))
+ == (bit_YMM_state | bit_XMM_state);
+
+ /* Is YMM and XMM state usable? */
+ if (ymm_xmm_usable)
+ {
+ if (flag_avx)
+ {
+ features_local |= x86_math_feature_avx;
+ if (flag_avx2)
+ features_local |= x86_math_feature_avx2;
+ if (flag_fma)
+ features_local |= x86_math_feature_fma;
+
+ if (cpu_amd)
+ {
+ __cpuid (0x80000000, eax, ebx, ecx, edx);
+ if (eax >= 0x80000001)
+ {
+ __cpuid (0x80000001, eax, ebx, ecx, edx);
+ bool flag_fma4 = ecx & bit_cpu_FMA4;
+ if (flag_fma4)
+ features_local |= x86_math_feature_fma4;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ atomic_store_relaxed (&features, features_local);
+ return features_local;
+}
--- /dev/null
+#ifndef X86_MATH_FEATURES_H
+#define X86_MATH_FEATURES_H
+
+enum
+ {
+ x86_math_feature_initialized = 1 << 0,
+ x86_math_feature_avx = 1 << 1,
+ x86_math_feature_avx2 = 1 << 2,
+ x86_math_feature_fma = 1 << 3,
+ x86_math_feature_fma4 = 1 << 4,
+ x86_math_feature_sse41 = 1 << 5,
+ };
+
+/* Return a combination of flags x86_math_feature_* above. */
+unsigned int __x86_math_features (void)
+ __attribute__ ((const)) attribute_hidden;
+
+#endif /* X86_MATH_FEATURES_H */