/* Initialize CPU feature data.
This file is part of the GNU C Library.
- Copyright (C) 2008-2017 Free Software Foundation, Inc.
+ Copyright (C) 2008-2018 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
#include <cpuid.h>
#include <cpu-features.h>
+#include <dl-hwcap.h>
+#include <libc-pointer-arith.h>
+
+#if HAVE_TUNABLES
+# define TUNABLE_NAMESPACE tune
+# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */
+# include <elf/dl-tunables.h>
+
+extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
+ attribute_hidden;
+#endif
static void
get_common_indeces (struct cpu_features *cpu_features,
}
}
}
+
+ /* For _dl_runtime_resolve, set xsave_state_size to xsave area
+ size + integer register save size and align it to 64 bytes. */
+ if (cpu_features->max_cpuid >= 0xd)
+ {
+ unsigned int eax, ebx, ecx, edx;
+
+ __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
+ if (ebx != 0)
+ {
+ unsigned int xsave_state_full_size
+ = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+
+ cpu_features->xsave_state_size
+ = xsave_state_full_size;
+ cpu_features->xsave_state_full_size
+ = xsave_state_full_size;
+
+ __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
+
+ /* Check if XSAVEC is available. */
+ if ((eax & (1 << 1)) != 0)
+ {
+ unsigned int xstate_comp_offsets[32];
+ unsigned int xstate_comp_sizes[32];
+ unsigned int i;
+
+ xstate_comp_offsets[0] = 0;
+ xstate_comp_offsets[1] = 160;
+ xstate_comp_offsets[2] = 576;
+ xstate_comp_sizes[0] = 160;
+ xstate_comp_sizes[1] = 256;
+
+ for (i = 2; i < 32; i++)
+ {
+ if ((STATE_SAVE_MASK & (1 << i)) != 0)
+ {
+ __cpuid_count (0xd, i, eax, ebx, ecx, edx);
+ xstate_comp_sizes[i] = eax;
+ }
+ else
+ {
+ ecx = 0;
+ xstate_comp_sizes[i] = 0;
+ }
+
+ if (i > 2)
+ {
+ xstate_comp_offsets[i]
+ = (xstate_comp_offsets[i - 1]
+ + xstate_comp_sizes[i -1]);
+ if ((ecx & (1 << 1)) != 0)
+ xstate_comp_offsets[i]
+ = ALIGN_UP (xstate_comp_offsets[i], 64);
+ }
+ }
+
+ /* Use XSAVEC. */
+ unsigned int size
+ = xstate_comp_offsets[31] + xstate_comp_sizes[31];
+ if (size)
+ {
+ cpu_features->xsave_state_size
+ = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+ cpu_features->feature[index_arch_XSAVEC_Usable]
+ |= bit_arch_XSAVEC_Usable;
+ }
+ }
+ }
+ }
}
}
else
cpu_features->feature[index_arch_Prefer_No_AVX512]
|= bit_arch_Prefer_No_AVX512;
-
- /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow.
- If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt. */
- cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow]
- |= bit_arch_Use_dl_runtime_resolve_slow;
- if (cpu_features->max_cpuid >= 0xd)
- {
- unsigned int eax;
-
- __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
- if ((eax & (1 << 2)) != 0)
- cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt]
- |= bit_arch_Use_dl_runtime_resolve_opt;
- }
}
/* This spells out "AuthenticAMD". */
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
cpu_features->family = family;
cpu_features->model = model;
cpu_features->kind = kind;
+
+#if HAVE_TUNABLES
+ TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
+ cpu_features->non_temporal_threshold
+ = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL);
+ cpu_features->data_cache_size
+ = TUNABLE_GET (x86_data_cache_size, long int, NULL);
+ cpu_features->shared_cache_size
+ = TUNABLE_GET (x86_shared_cache_size, long int, NULL);
+#endif
+
+ /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86. */
+#if !HAVE_TUNABLES && defined SHARED
+ /* The glibc.tune.hwcap_mask tunable is initialized already, so no need to do
+ this. */
+ GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT;
+#endif
+
+#ifdef __x86_64__
+ GLRO(dl_hwcap) = HWCAP_X86_64;
+ if (cpu_features->kind == arch_kind_intel)
+ {
+ const char *platform = NULL;
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ && CPU_FEATURES_CPU_P (cpu_features, AVX512CD))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF))
+ platform = "xeon_phi";
+ }
+ else
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW)
+ && CPU_FEATURES_CPU_P (cpu_features, AVX512DQ)
+ && CPU_FEATURES_CPU_P (cpu_features, AVX512VL))
+ GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
+ }
+ }
+
+ if (platform == NULL
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+ && CPU_FEATURES_CPU_P (cpu_features, BMI1)
+ && CPU_FEATURES_CPU_P (cpu_features, BMI2)
+ && CPU_FEATURES_CPU_P (cpu_features, LZCNT)
+ && CPU_FEATURES_CPU_P (cpu_features, MOVBE)
+ && CPU_FEATURES_CPU_P (cpu_features, POPCNT))
+ platform = "haswell";
+
+ if (platform != NULL)
+ GLRO(dl_platform) = platform;
+ }
+#else
+ GLRO(dl_hwcap) = 0;
+ if (CPU_FEATURES_CPU_P (cpu_features, SSE2))
+ GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, I686))
+ GLRO(dl_platform) = "i686";
+ else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
+ GLRO(dl_platform) = "i586";
+#endif
}