]> git.ipfire.org Git - thirdparty/glibc.git/blobdiff - sysdeps/x86/cpu-features.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / x86 / cpu-features.c
index f30918dd3bee0317f868edc1ba8448f7a209991f..0fc3674c4b81f85af9a19228ed2b0e83e41511ab 100644 (file)
@@ -1,6 +1,6 @@
 /* Initialize CPU feature data.
    This file is part of the GNU C Library.
-   Copyright (C) 2008-2017 Free Software Foundation, Inc.
+   Copyright (C) 2008-2018 Free Software Foundation, Inc.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
 
 #include <cpuid.h>
 #include <cpu-features.h>
+#include <dl-hwcap.h>
+#include <libc-pointer-arith.h>
+
+#if HAVE_TUNABLES
+# define TUNABLE_NAMESPACE tune
+# include <unistd.h>           /* Get STDOUT_FILENO for _dl_printf.  */
+# include <elf/dl-tunables.h>
+
+extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
+  attribute_hidden;
+#endif
 
 static void
 get_common_indeces (struct cpu_features *cpu_features,
@@ -93,6 +104,76 @@ get_common_indeces (struct cpu_features *cpu_features,
                }
            }
        }
+
+      /* For _dl_runtime_resolve, set xsave_state_size to xsave area
+        size + integer register save size and align it to 64 bytes.  */
+      if (cpu_features->max_cpuid >= 0xd)
+       {
+         unsigned int eax, ebx, ecx, edx;
+
+         __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
+         if (ebx != 0)
+           {
+             unsigned int xsave_state_full_size
+               = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+
+             cpu_features->xsave_state_size
+               = xsave_state_full_size;
+             cpu_features->xsave_state_full_size
+               = xsave_state_full_size;
+
+             __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
+
+             /* Check if XSAVEC is available.  */
+             if ((eax & (1 << 1)) != 0)
+               {
+                 unsigned int xstate_comp_offsets[32];
+                 unsigned int xstate_comp_sizes[32];
+                 unsigned int i;
+
+                 xstate_comp_offsets[0] = 0;
+                 xstate_comp_offsets[1] = 160;
+                 xstate_comp_offsets[2] = 576;
+                 xstate_comp_sizes[0] = 160;
+                 xstate_comp_sizes[1] = 256;
+
+                 for (i = 2; i < 32; i++)
+                   {
+                     if ((STATE_SAVE_MASK & (1 << i)) != 0)
+                       {
+                         __cpuid_count (0xd, i, eax, ebx, ecx, edx);
+                         xstate_comp_sizes[i] = eax;
+                       }
+                     else
+                       {
+                         ecx = 0;
+                         xstate_comp_sizes[i] = 0;
+                       }
+
+                     if (i > 2)
+                       {
+                         xstate_comp_offsets[i]
+                           = (xstate_comp_offsets[i - 1]
+                              + xstate_comp_sizes[i -1]);
+                         if ((ecx & (1 << 1)) != 0)
+                           xstate_comp_offsets[i]
+                             = ALIGN_UP (xstate_comp_offsets[i], 64);
+                       }
+                   }
+
+                 /* Use XSAVEC.  */
+                 unsigned int size
+                   = xstate_comp_offsets[31] + xstate_comp_sizes[31];
+                 if (size)
+                   {
+                     cpu_features->xsave_state_size
+                       = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+                     cpu_features->feature[index_arch_XSAVEC_Usable]
+                       |= bit_arch_XSAVEC_Usable;
+                   }
+               }
+           }
+       }
     }
 }
 
@@ -232,20 +313,6 @@ init_cpu_features (struct cpu_features *cpu_features)
       else
        cpu_features->feature[index_arch_Prefer_No_AVX512]
          |= bit_arch_Prefer_No_AVX512;
-
-      /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow.
-         If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt.  */
-      cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow]
-       |= bit_arch_Use_dl_runtime_resolve_slow;
-      if (cpu_features->max_cpuid >= 0xd)
-       {
-         unsigned int eax;
-
-         __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
-         if ((eax & (1 << 2)) != 0)
-           cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt]
-             |= bit_arch_Use_dl_runtime_resolve_opt;
-       }
     }
   /* This spells out "AuthenticAMD".  */
   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
@@ -310,4 +377,68 @@ no_cpuid:
   cpu_features->family = family;
   cpu_features->model = model;
   cpu_features->kind = kind;
+
+#if HAVE_TUNABLES
+  TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
+  cpu_features->non_temporal_threshold
+    = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL);
+  cpu_features->data_cache_size
+    = TUNABLE_GET (x86_data_cache_size, long int, NULL);
+  cpu_features->shared_cache_size
+    = TUNABLE_GET (x86_shared_cache_size, long int, NULL);
+#endif
+
+  /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86.  */
+#if !HAVE_TUNABLES && defined SHARED
+  /* The glibc.tune.hwcap_mask tunable is initialized already, so no need to do
+     this.  */
+  GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT;
+#endif
+
+#ifdef __x86_64__
+  GLRO(dl_hwcap) = HWCAP_X86_64;
+  if (cpu_features->kind == arch_kind_intel)
+    {
+      const char *platform = NULL;
+
+      if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+         && CPU_FEATURES_CPU_P (cpu_features, AVX512CD))
+       {
+         if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
+           {
+             if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF))
+               platform = "xeon_phi";
+           }
+         else
+           {
+             if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW)
+                 && CPU_FEATURES_CPU_P (cpu_features, AVX512DQ)
+                 && CPU_FEATURES_CPU_P (cpu_features, AVX512VL))
+               GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
+           }
+       }
+
+      if (platform == NULL
+         && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+         && CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+         && CPU_FEATURES_CPU_P (cpu_features, BMI1)
+         && CPU_FEATURES_CPU_P (cpu_features, BMI2)
+         && CPU_FEATURES_CPU_P (cpu_features, LZCNT)
+         && CPU_FEATURES_CPU_P (cpu_features, MOVBE)
+         && CPU_FEATURES_CPU_P (cpu_features, POPCNT))
+       platform = "haswell";
+
+      if (platform != NULL)
+       GLRO(dl_platform) = platform;
+    }
+#else
+  GLRO(dl_hwcap) = 0;
+  if (CPU_FEATURES_CPU_P (cpu_features, SSE2))
+    GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, I686))
+    GLRO(dl_platform) = "i686";
+  else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
+    GLRO(dl_platform) = "i586";
+#endif
 }