Z_INTERNAL int x86_cpu_has_pclmulqdq;
Z_INTERNAL int x86_cpu_has_vpclmulqdq;
Z_INTERNAL int x86_cpu_has_tzcnt;
+Z_INTERNAL int x86_cpu_has_os_save_ymm;
+Z_INTERNAL int x86_cpu_has_os_save_zmm;
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
#ifdef _MSC_VER
#endif
}
+static uint64_t xgetbv(unsigned int xcr) {
+#ifdef _MSC_VER
+ return _xgetbv(xcr);
+#else
+ uint32_t eax, edx;
+ __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr));
+ return (uint64_t)(edx) << 32 | eax;
+#endif
+}
+
void Z_INTERNAL x86_check_features(void) {
unsigned eax, ebx, ecx, edx;
unsigned maxbasic;
x86_cpu_has_sse42 = ecx & 0x100000;
x86_cpu_has_pclmulqdq = ecx & 0x2;
+ if (ecx & 0x08000000) {
+ uint64_t xfeature = xgetbv(0);
+
+ x86_cpu_has_os_save_ymm = ((xfeature & 0x06) == 0x06);
+ x86_cpu_has_os_save_zmm = ((xfeature & 0xe6) == 0xe6);
+ } else {
+ x86_cpu_has_os_save_ymm = 0;
+ x86_cpu_has_os_save_zmm = 0;
+ }
+
if (maxbasic >= 7) {
cpuidex(7, 0, &eax, &ebx, &ecx, &edx);
// check BMI1 bit
// Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
x86_cpu_has_tzcnt = ebx & 0x8;
- // check AVX2 bit
- x86_cpu_has_avx2 = ebx & 0x20;
- x86_cpu_has_avx512 = ebx & 0x00010000;
- x86_cpu_has_avx512vnni = ecx & 0x800;
x86_cpu_has_vpclmulqdq = ecx & 0x400;
+
+ // check AVX2 bit if the OS supports saving YMM registers
+ if (x86_cpu_has_os_save_ymm) {
+ x86_cpu_has_avx2 = ebx & 0x20;
+ } else {
+ x86_cpu_has_avx2 = 0;
+ }
+
+ // check AVX512 bits if the OS supports saving ZMM registers
+ if (x86_cpu_has_os_save_zmm) {
+ x86_cpu_has_avx512 = ebx & 0x00010000;
+ x86_cpu_has_avx512vnni = ecx & 0x800;
+ } else {
+ x86_cpu_has_avx512 = 0;
+ x86_cpu_has_avx512vnni = 0;
+ }
} else {
x86_cpu_has_tzcnt = 0;
x86_cpu_has_avx2 = 0;
+ x86_cpu_has_avx512 = 0;
+ x86_cpu_has_avx512vnni = 0;
x86_cpu_has_vpclmulqdq = 0;
}
}