]> git.ipfire.org Git - thirdparty/gnutls.git/commitdiff
accelerated: clear AVX bits if it cannot be queried through XSAVE
authorDaiki Ueno <ueno@gnu.org>
Mon, 15 Aug 2022 00:39:18 +0000 (09:39 +0900)
committerDaiki Ueno <ueno@gnu.org>
Tue, 23 Aug 2022 07:03:10 +0000 (16:03 +0900)
The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32
Architectures Software Developer’s Manual".

GnuTLS previously only followed that algorithm when registering the
crypto backend, while the CRYPTOGAMS derived SHA code assembly expects
that the extension bits are propagated to _gnutls_x86_cpuid_s.

Signed-off-by: Daiki Ueno <ueno@gnu.org>
lib/accelerated/x86/x86-common.c

index 7ddaa594e652e10259459a0dc057e33e7d01c6df..b7a88ddeca7e8849b1d54a2b67e1c8a56df5de38 100644 (file)
@@ -81,6 +81,26 @@ unsigned int _gnutls_x86_cpuid_s[4];
 # define bit_AVX 0x10000000
 #endif
 
+#ifndef bit_AVX2
+# define bit_AVX2 0x00000020
+#endif
+
+#ifndef bit_AVX512F
+# define bit_AVX512F 0x00010000
+#endif
+
+#ifndef bit_AVX512IFMA
+# define bit_AVX512IFMA 0x00200000
+#endif
+
+#ifndef bit_AVX512BW
+# define bit_AVX512BW 0x40000000
+#endif
+
+#ifndef bit_AVX512VL
+# define bit_AVX512VL 0x80000000
+#endif
+
 #ifndef bit_OSXSAVE
 # define bit_OSXSAVE 0x8000000
 #endif
@@ -89,10 +109,6 @@ unsigned int _gnutls_x86_cpuid_s[4];
 # define bit_MOVBE 0x00400000
 #endif
 
-#ifndef OSXSAVE_MASK
-# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE)
-#endif
-
 #define bit_PADLOCK (0x3 << 6)
 #define bit_PADLOCK_PHE (0x3 << 10)
 #define bit_PADLOCK_PHE_SHA512 (0x3 << 25)
@@ -148,7 +164,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
 {
        uint32_t xcr0;
 
-       if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK)
+       if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
                return 0;
 
 #if defined(_MSC_VER) && !defined(__clang__)
@@ -190,8 +206,9 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
        }
 
        if (capabilities & INTEL_AVX) {
-               if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) {
-                       _gnutls_x86_cpuid_s[1] |= bit_AVX|OSXSAVE_MASK;
+               if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) &&
+                   check_4th_gen_intel_features(a[1])) {
+                       _gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
                } else {
                        _gnutls_debug_log
                            ("AVX acceleration requested but not available\n");
@@ -236,10 +253,7 @@ static unsigned check_sha(void)
 #ifdef ASM_X86_64
 static unsigned check_avx_movbe(void)
 {
-       if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0)
-               return 0;
-
-       return ((_gnutls_x86_cpuid_s[1] & bit_AVX));
+       return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
 }
 
 static unsigned check_pclmul(void)
@@ -884,6 +898,19 @@ void register_x86_intel_crypto(unsigned capabilities)
        if (capabilities == 0) {
                if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
                        return;
+               if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
+                       _gnutls_x86_cpuid_s[1] &= ~bit_AVX;
+
+                       /* Clear AVX2 bits as well, according to what
+                        * OpenSSL does.  Should we clear
+                        * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER,
+                        * and bit_AVX512CD? */
+                       _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|
+                                                   bit_AVX512F|
+                                                   bit_AVX512IFMA|
+                                                   bit_AVX512BW|
+                                                   bit_AVX512BW);
+               }
        } else {
                capabilities_to_intel_cpuid(capabilities);
        }