From: Cameron Cawley Date: Sun, 25 Sep 2022 19:11:14 +0000 (+0100) Subject: Check that the OS supports saving the YMM registers before enabling AVX2 X-Git-Tag: 2.1.0-beta1~153 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2fb28557351b3e394dce0be44b364ca95d7e47dc;p=thirdparty%2Fzlib-ng.git Check that the OS supports saving the YMM registers before enabling AVX2 --- diff --git a/arch/x86/x86_features.c b/arch/x86/x86_features.c index 72ef885e8..b8ab69cef 100644 --- a/arch/x86/x86_features.c +++ b/arch/x86/x86_features.c @@ -28,6 +28,8 @@ Z_INTERNAL int x86_cpu_has_sse42; Z_INTERNAL int x86_cpu_has_pclmulqdq; Z_INTERNAL int x86_cpu_has_vpclmulqdq; Z_INTERNAL int x86_cpu_has_tzcnt; +Z_INTERNAL int x86_cpu_has_os_save_ymm; +Z_INTERNAL int x86_cpu_has_os_save_zmm; static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) { #ifdef _MSC_VER @@ -57,6 +59,16 @@ static void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigne #endif } +static uint64_t xgetbv(unsigned int xcr) { +#ifdef _MSC_VER + return _xgetbv(xcr); +#else + uint32_t eax, edx; + __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr)); + return (uint64_t)(edx) << 32 | eax; +#endif +} + void Z_INTERNAL x86_check_features(void) { unsigned eax, ebx, ecx, edx; unsigned maxbasic; @@ -70,20 +82,44 @@ void Z_INTERNAL x86_check_features(void) { x86_cpu_has_sse42 = ecx & 0x100000; x86_cpu_has_pclmulqdq = ecx & 0x2; + if (ecx & 0x08000000) { + uint64_t xfeature = xgetbv(0); + + x86_cpu_has_os_save_ymm = ((xfeature & 0x06) == 0x06); + x86_cpu_has_os_save_zmm = ((xfeature & 0xe6) == 0xe6); + } else { + x86_cpu_has_os_save_ymm = 0; + x86_cpu_has_os_save_zmm = 0; + } + if (maxbasic >= 7) { cpuidex(7, 0, &eax, &ebx, &ecx, &edx); // check BMI1 bit // Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf x86_cpu_has_tzcnt = ebx & 0x8; - // check AVX2 bit - x86_cpu_has_avx2 = ebx & 0x20; - x86_cpu_has_avx512 = ebx & 0x00010000; - x86_cpu_has_avx512vnni = ecx & 0x800; x86_cpu_has_vpclmulqdq = ecx & 0x400; + + // check AVX2 bit if the OS supports saving YMM registers + if (x86_cpu_has_os_save_ymm) { + x86_cpu_has_avx2 = ebx & 0x20; + } else { + x86_cpu_has_avx2 = 0; + } + + // check AVX512 bits if the OS supports saving ZMM registers + if (x86_cpu_has_os_save_zmm) { + x86_cpu_has_avx512 = ebx & 0x00010000; + x86_cpu_has_avx512vnni = ecx & 0x800; + } else { + x86_cpu_has_avx512 = 0; + x86_cpu_has_avx512vnni = 0; + } } else { x86_cpu_has_tzcnt = 0; x86_cpu_has_avx2 = 0; + x86_cpu_has_avx512 = 0; + x86_cpu_has_avx512vnni = 0; x86_cpu_has_vpclmulqdq = 0; } } diff --git a/arch/x86/x86_features.h b/arch/x86/x86_features.h index 97630ab6e..3e0d17f06 100644 --- a/arch/x86/x86_features.h +++ b/arch/x86/x86_features.h @@ -16,6 +16,8 @@ extern int x86_cpu_has_sse42; extern int x86_cpu_has_pclmulqdq; extern int x86_cpu_has_vpclmulqdq; extern int x86_cpu_has_tzcnt; +extern int x86_cpu_has_os_save_ymm; +extern int x86_cpu_has_os_save_zmm; void Z_INTERNAL x86_check_features(void);