Extension of OPENSSL_ia32cap to accommodate additional CPUID bits

author Elizarova, Alina <alina.elizarova@intel.com>

Wed, 4 Dec 2024 18:29:23 +0000 (10:29 -0800)

committer Tomas Mraz <tomas@openssl.org>

Fri, 13 Dec 2024 13:51:22 +0000 (14:51 +0100)
author Elizarova, Alina <alina.elizarova@intel.com>
Wed, 4 Dec 2024 18:29:23 +0000 (10:29 -0800)
committer Tomas Mraz <tomas@openssl.org>
Fri, 13 Dec 2024 13:51:22 +0000 (14:51 +0100)
diff --git a/CHANGES.md b/CHANGES.md

index 9a103d9f590d1930fb22966d1642e57b57024c24..1e3a5b53ad88e3a0103c13706ee4b979cbba16b8 100644 (file)
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -78,6 +78,12 @@ OpenSSL 3.5
  
     *Paul Dale*
  
+ * Extended `OPENSSL_ia32cap` support to accommodate additional `CPUID`
+   feature/capability bits in leaf `0x7` (Extended Feature Flags) as well
+   as leaf `0x24` (Converged Vector ISA).
+
+   *Dan Zimmerman, Alina Elizarova*
+
  OpenSSL 3.4
  -----------
  
diff --git a/crypto/cpuid.c b/crypto/cpuid.c

index 51cbe5ea090eebc5830e3b5b793a8caba2f91afe..538a5a039f20da0ab4387fe3360ce59119b230e8 100644 (file)
--- a/crypto/cpuid.c
+++ b/crypto/cpuid.c
@@ -14,7 +14,7 @@
          defined(__x86_64) || defined(__x86_64__) || \
          defined(_M_AMD64) || defined(_M_X64)
  
-extern unsigned int OPENSSL_ia32cap_P[4];
+extern unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];
  
  # if defined(OPENSSL_CPUID_OBJ)
  
@@ -29,7 +29,7 @@ extern unsigned int OPENSSL_ia32cap_P[4];
   */
  #  ifdef _WIN32
  typedef WCHAR variant_char;
-
+#   define OPENSSL_IA32CAP_P_MAX_CHAR_SIZE 256
  static variant_char *ossl_getenv(const char *name)
  {
      /*
@@ -37,10 +37,10 @@ static variant_char *ossl_getenv(const char *name)
       * just ignore |name| and use equivalent wide-char L-literal.
       * As well as to ignore excessively long values...
       */
-    static WCHAR value[48];
-    DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, 48);
+    static WCHAR value[OPENSSL_IA32CAP_P_MAX_CHAR_SIZE];
+    DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, OPENSSL_IA32CAP_P_MAX_CHAR_SIZE);
  
-    return (len > 0 && len < 48) ? value : NULL;
+    return (len > 0 && len < OPENSSL_IA32CAP_P_MAX_CHAR_SIZE) ? value : NULL;
  }
  #  else
  typedef char variant_char;
@@ -98,6 +98,7 @@ void OPENSSL_cpuid_setup(void)
      IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
      IA32CAP vec;
      const variant_char *env;
+    int index = 2;
  
      if (trigger)
          return;
@@ -126,23 +127,37 @@ void OPENSSL_cpuid_setup(void)
              vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
          }
  
-        if ((env = ossl_strchr(env, ':')) != NULL) {
-            IA32CAP vecx;
-
+        /* Processed indexes 0, 1 */
+        if ((env = ossl_strchr(env, ':')) != NULL)
              env++;
-            off = (env[0] == '~') ? 1 : 0;
-            vecx = ossl_strtouint64(env + off);
-            if (off) {
-                OPENSSL_ia32cap_P[2] &= ~(unsigned int)vecx;
-                OPENSSL_ia32cap_P[3] &= ~(unsigned int)(vecx >> 32);
-            } else {
-                OPENSSL_ia32cap_P[2] = (unsigned int)vecx;
-                OPENSSL_ia32cap_P[3] = (unsigned int)(vecx >> 32);
+        for (; index < OPENSSL_IA32CAP_P_MAX_INDEXES; index += 2) {
+            if ((env != NULL) && (env[0] != '\0')) {
+                /* if env[0] == ':' current index is skipped */
+                if (env[0] != ':') {
+                    IA32CAP vecx;
+
+                    off = (env[0] == '~') ? 1 : 0;
+                    vecx = ossl_strtouint64(env + off);
+                    if (off) {
+                        OPENSSL_ia32cap_P[index] &= ~(unsigned int)vecx;
+                        OPENSSL_ia32cap_P[index + 1] &= ~(unsigned int)(vecx >> 32);
+                    } else {
+                        OPENSSL_ia32cap_P[index] = (unsigned int)vecx;
+                        OPENSSL_ia32cap_P[index + 1] = (unsigned int)(vecx >> 32);
+                    }
+                }
+                /* skip delimeter */
+                if ((env = ossl_strchr(env, ':')) != NULL)
+                    env++;
+            } else { /* zeroize the next two indexes */
+                OPENSSL_ia32cap_P[index] = 0;
+                OPENSSL_ia32cap_P[index + 1] = 0;
              }
-        } else {
-            OPENSSL_ia32cap_P[2] = 0;
-            OPENSSL_ia32cap_P[3] = 0;
          }
+
+        /* If AVX10 is disabled, zero out its detailed cap bits */
+        if (!(OPENSSL_ia32cap_P[6] & (1 << 19)))
+            OPENSSL_ia32cap_P[9] = 0;
      } else {
          vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
      }
@@ -156,7 +171,7 @@ void OPENSSL_cpuid_setup(void)
      OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32);
  }
  # else
-unsigned int OPENSSL_ia32cap_P[4];
+unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];
  # endif
  #endif
  
diff --git a/crypto/info.c b/crypto/info.c

index ad31c9ec31fe23377c79e1ab53f8236789de38e7..4d70471be255c282352382a86a9554ac85b418c9 100644 (file)
--- a/crypto/info.c
+++ b/crypto/info.c
@@ -30,7 +30,7 @@
  # include "crypto/riscv_arch.h"
  # define CPU_INFO_STR_LEN 2048
  #else
-# define CPU_INFO_STR_LEN 128
+# define CPU_INFO_STR_LEN 256
  #endif
  
  /* extern declaration to avoid warning */
@@ -52,11 +52,18 @@ DEFINE_RUN_ONCE_STATIC(init_info_strings)
      const char *env;
  
      BIO_snprintf(ossl_cpu_info_str, sizeof(ossl_cpu_info_str),
-                 CPUINFO_PREFIX "OPENSSL_ia32cap=0x%llx:0x%llx",
+                 CPUINFO_PREFIX "OPENSSL_ia32cap=0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx",
                   (unsigned long long)OPENSSL_ia32cap_P[0] |
                   (unsigned long long)OPENSSL_ia32cap_P[1] << 32,
                   (unsigned long long)OPENSSL_ia32cap_P[2] |
-                 (unsigned long long)OPENSSL_ia32cap_P[3] << 32);
+                 (unsigned long long)OPENSSL_ia32cap_P[3] << 32,
+                 (unsigned long long)OPENSSL_ia32cap_P[4] |
+                 (unsigned long long)OPENSSL_ia32cap_P[5] << 32,
+                 (unsigned long long)OPENSSL_ia32cap_P[6] |
+                 (unsigned long long)OPENSSL_ia32cap_P[7] << 32,
+                 (unsigned long long)OPENSSL_ia32cap_P[8] |
+                 (unsigned long long)OPENSSL_ia32cap_P[9] << 32);
+
      if ((env = getenv("OPENSSL_ia32cap")) != NULL)
          BIO_snprintf(ossl_cpu_info_str + strlen(ossl_cpu_info_str),
                       sizeof(ossl_cpu_info_str) - strlen(ossl_cpu_info_str),
diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl

index 1b2b27c02286ae5e55c32ebaa5fc3735aadb001d..f3c01ea89b4b4849a107c8c12890147a10ad94fd 100644 (file)
--- a/crypto/perlasm/x86gas.pl
+++ b/crypto/perlasm/x86gas.pl
@@ -167,7 +167,8 @@ sub ::file_end
         }
      }
      if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
-       my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
+    # OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
+       my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,40";
         if ($::macosx)  { push (@out,"$tmp,2\n"); }
         elsif ($::elf)  { push (@out,"$tmp,4\n"); }
         else            { push (@out,"$tmp\n"); }
diff --git a/crypto/perlasm/x86masm.pl b/crypto/perlasm/x86masm.pl

index 2dcd3f79f6ed23fcd2f52fd0f987f0f4fa2a7c96..ccdba757dc28e39650f373921a7fbd0e18a42076 100644 (file)
--- a/crypto/perlasm/x86masm.pl
+++ b/crypto/perlasm/x86masm.pl
@@ -139,9 +139,10 @@ ___
      push(@out,"$segment        ENDS\n");
  
      if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
+    # OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
      {  my $comm=<<___;
  .bss   SEGMENT 'BSS'
-COMM   ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
+COMM   ${nmdecor}OPENSSL_ia32cap_P:DWORD:10
  .bss   ENDS
  ___
         # comment out OPENSSL_ia32cap_P declarations
diff --git a/crypto/perlasm/x86nasm.pl b/crypto/perlasm/x86nasm.pl

index 7017b88e80a3951ff025622a68ce9b9bfff188ef..a8cdd2d0bbe5421ec26a600b775beb189dfbe60c 100644 (file)
--- a/crypto/perlasm/x86nasm.pl
+++ b/crypto/perlasm/x86nasm.pl
@@ -124,9 +124,10 @@ sub ::function_end_B
  
  sub ::file_end
  {   if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
+    # OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
      {  my $comm=<<___;
  ${drdecor}segment      .bss
-${drdecor}common       ${nmdecor}OPENSSL_ia32cap_P 16
+${drdecor}common       ${nmdecor}OPENSSL_ia32cap_P 40
  ___
         # comment out OPENSSL_ia32cap_P declarations
         grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl

index 53685ec26390d2a7393c142df03419ae5f2a5c8a..f0eb8510ed2e916b1190b08dbb8afdcb9cc3cfe1 100644 (file)
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -27,14 +27,14 @@ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
                                  ("%rdi","%rsi","%rdx","%rcx"); # Unix order
  
  print<<___;
+#include crypto/cryptlib.h
  .extern                OPENSSL_cpuid_setup
  .hidden                OPENSSL_cpuid_setup
  .section       .init
         call    OPENSSL_cpuid_setup
  
  .hidden        OPENSSL_ia32cap_P
-.comm  OPENSSL_ia32cap_P,16,4
-
+.comm  OPENSSL_ia32cap_P,40,4  # <--Should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
  .text
  
  .globl OPENSSL_atomic_add
@@ -192,6 +192,7 @@ OPENSSL_ia32_cpuid:
         mov     \$7,%eax
         xor     %ecx,%ecx
         cpuid
+       movd    %eax,%xmm1              # put aside leaf 07H Max Sub-leaves
         bt      \$26,%r9d               # check XSAVE bit, cleared on Knights
         jc      .Lnotknights
         and     \$0xfff7ffff,%ebx       # clear ADCX/ADOX flag
@@ -202,9 +203,31 @@ OPENSSL_ia32_cpuid:
         jne     .Lnotskylakex
         and     \$0xfffeffff,%ebx       # ~(1<<16)
                                         # suppress AVX512F flag on Skylake-X
-.Lnotskylakex:
-       mov     %ebx,8(%rdi)            # save extended feature flags
-       mov     %ecx,12(%rdi)
+
+.Lnotskylakex:         # save extended feature flags
+       mov     %ebx,8(%rdi)            # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
+       mov     %ecx,12(%rdi)           # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
+       mov     %edx,16(%rdi)           # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
+
+       movd    %xmm1,%eax              # Restore leaf 07H Max Sub-leaves
+       cmp     \$0x1,%eax              # Do we have cpuid(EAX=0x7, ECX=0x1)?
+       jb .Lno_extended_info
+       mov     \$0x7,%eax
+       mov \$0x1,%ecx
+       cpuid           # cpuid(EAX=0x7, ECX=0x1)
+       mov     %eax,20(%rdi)           # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
+       mov     %edx,24(%rdi)           # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
+       mov     %ebx,28(%rdi)           # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
+       mov     %ecx,32(%rdi)           # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
+
+       and \$0x80000,%edx              # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
+       cmp \$0x0,%edx
+       je .Lno_extended_info
+       mov     \$0x24,%eax             # Have AVX10 Support, query for details
+       mov \$0x0,%ecx
+       cpuid           # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
+       mov     %ebx,36(%rdi)           # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
+
  .Lno_extended_info:
  
         bt      \$27,%r9d               # check OSXSAVE bit
@@ -223,6 +246,9 @@ OPENSSL_ia32_cpuid:
         cmp     \$6,%eax
         je      .Ldone
  .Lclear_avx:
+       andl    \$0xff7fffff,20(%rdi)   # ~(1<<23)
+                                                                       # clear AVXIFMA, which is VEX-encoded
+                                                                       # and requires YMM state support
         mov     \$0xefffe7ff,%eax       # ~(1<<28|1<<12|1<<11)
         and     %eax,%r9d               # clear AVX, FMA and AMD XOP bits
         mov     \$0x3fdeffdf,%eax       # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl

index a7bcb27e262d6a2933d4a5fe18fded409afc2fa5..35e2c5b0a5404b76123e8b1349ed27f9ef51d432 100644 (file)
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -137,7 +137,28 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
         &mov    ("eax",7);
         &xor    ("ecx","ecx");
         &cpuid  ();
-       &mov    (&DWP(8,"edi"),"ebx");  # save extended feature flag
+       &mov    (&DWP(8,"edi"),"ebx");  # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
+       &mov    (&DWP(12,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
+       &mov    (&DWP(16,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
+       &cmp    ("eax",1);                              # Do we have cpuid(EAX=0x7, ECX=0x1)?
+       &jb     (&label("no_extended_info"));
+       &mov    ("eax",7);
+       &mov    ("ecx",1);
+       &cpuid  ();                                             # cpuid(EAX=0x7, ECX=0x1)
+       &mov    (&DWP(20,"edi"),"eax"); # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
+       &mov    (&DWP(24,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
+       &mov    (&DWP(28,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
+       &mov    (&DWP(32,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
+
+       &and    ("edx",0x80000);                # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
+       &cmp    ("edx",0x0);
+       &je (&label("no_extended_info"));
+
+       &mov    ("eax",0x24);                   # Have AVX10 Support, query for details
+       &mov    ("ecx",0x0);
+       &cpuid  ();                                             # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
+       &mov    (&DWP(36,"edi"),"ebx"); # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
+
  &set_label("no_extended_info");
  
         &bt     ("ebp",27);             # check OSXSAVE bit
@@ -154,6 +175,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
         &and    ("esi",0xfeffffff);     # clear FXSR
  &set_label("clear_avx");
         &and    ("ebp",0xefffe7ff);     # clear AVX, FMA and AMD XOP bits
+       &and    (&DWP(20,"edi"),0xff7fffff);    # ~(1<<23) clear AVXIFMA,
+                                                                                       # which is VEX-encoded
+                                                                                       # and requires YMM state support
         &and    (&DWP(8,"edi"),0xffffffdf);     # clear AVX2
  &set_label("done");
         &mov    ("eax","esi");
diff --git a/doc/man3/OPENSSL_ia32cap.pod b/doc/man3/OPENSSL_ia32cap.pod

index c6c1c0185ad87ab0e5068e0eb31347c007cac889..2b0dc93d494c6d9cc1bce96e2f489eda0f0a704e 100644 (file)
--- a/doc/man3/OPENSSL_ia32cap.pod
+++ b/doc/man3/OPENSSL_ia32cap.pod
@@ -10,81 +10,77 @@ OPENSSL_ia32cap - the x86[_64] processor capabilities vector
  
  =head1 DESCRIPTION
  
-OpenSSL supports a range of x86[_64] instruction set extensions. These
-extensions are denoted by individual bits in capability vector returned
-by processor in EDX:ECX register pair after executing CPUID instruction
-with EAX=1 input value (see Intel Application Note #241618). This vector
-is copied to memory upon toolkit initialization and used to choose
-between different code paths to provide optimal performance across wide
-range of processors. For the moment of this writing following bits are
-significant:
+OpenSSL supports a range of x86[_64] instruction set extensions and
+features. These extensions are denoted by individual bits or groups of bits
+stored internally as ten 32-bit capability vectors and for simplicity
+represented logically below as five 64-bit vectors. This logical
+vector (LV) representation is used to streamline the definition of the
+OPENSSL_ia32cap environment variable.
+
+Upon toolkit initialization, the capability vectors are populated through
+successive executions of the CPUID instruction, after which any OPENSSL_ia32cap
+environment variable capability bit modifications are applied. After toolkit
+initialization is complete, populated vectors are then used to choose
+between different code paths to provide optimal performance across a wide
+range of x86[_64] based processors.
+
+Further CPUID information can be found in the Intel(R) Architecture
+Instruction Set Extensions Programming Reference, and the AMD64 Architecture
+Programmer's Manual (Volume 3).
+
+=head2 Notable Capability Bits for LV0
+
+The following are notable capability bits from logical vector 0 (LV0)
+resulting from the following execution of CPUID.(EAX=01H).EDX and
+CPUID.(EAX=01H).ECX:
  
  =over 4
  
-=item bit #4 denoting presence of Time-Stamp Counter.
+=item bit #0+4 denoting presence of Time-Stamp Counter;
  
-=item bit #19 denoting availability of CLFLUSH instruction;
+=item bit #0+19 denoting availability of CLFLUSH instruction;
  
-=item bit #20, reserved by Intel, is used to choose among RC4 code paths;
+=item bit #0+20, reserved by Intel, is used to choose among RC4 code paths;
  
-=item bit #23 denoting MMX support;
+=item bit #0+23 denoting MMX support;
  
-=item bit #24, FXSR bit, denoting availability of XMM registers;
+=item bit #0+24, FXSR bit, denoting availability of XMM registers;
  
-=item bit #25 denoting SSE support;
+=item bit #0+25 denoting SSE support;
  
-=item bit #26 denoting SSE2 support;
+=item bit #0+26 denoting SSE2 support;
  
-=item bit #28 denoting Hyperthreading, which is used to distinguish
+=item bit #0+28 denoting Hyperthreading, which is used to distinguish
  cores with shared cache;
  
-=item bit #30, reserved by Intel, denotes specifically Intel CPUs;
+=item bit #0+30, reserved by Intel, denotes specifically Intel CPUs;
  
-=item bit #33 denoting availability of PCLMULQDQ instruction;
+=item bit #0+33 denoting availability of PCLMULQDQ instruction;
  
-=item bit #41 denoting SSSE3, Supplemental SSE3, support;
+=item bit #0+41 denoting SSSE3, Supplemental SSE3, support;
  
-=item bit #43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
+=item bit #0+43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
  
-=item bit #54 denoting availability of MOVBE instruction;
+=item bit #0+54 denoting availability of MOVBE instruction;
  
-=item bit #57 denoting AES-NI instruction set extension;
+=item bit #0+57 denoting AES-NI instruction set extension;
  
-=item bit #58, XSAVE bit, lack of which in combination with MOVBE is used
+=item bit #0+58, XSAVE bit, lack of which in combination with MOVBE is used
  to identify Atom Silvermont core;
  
-=item bit #59, OSXSAVE bit, denoting availability of YMM registers;
+=item bit #0+59, OSXSAVE bit, denoting availability of YMM registers;
  
-=item bit #60 denoting AVX extension;
+=item bit #0+60 denoting AVX extension;
  
-=item bit #62 denoting availability of RDRAND instruction;
+=item bit #0+62 denoting availability of RDRAND instruction;
  
  =back
  
-For example, in 32-bit application context clearing bit #26 at run-time
-disables high-performance SSE2 code present in the crypto library, while
-clearing bit #24 disables SSE2 code operating on 128-bit XMM register
-bank. You might have to do the latter if target OpenSSL application is
-executed on SSE2 capable CPU, but under control of OS that does not
-enable XMM registers. Historically address of the capability vector copy
-was exposed to application through OPENSSL_ia32cap_loc(), but not
-anymore. Now the only way to affect the capability detection is to set
-B<OPENSSL_ia32cap> environment variable prior target application start. To
-give a specific example, on Intel P4 processor
-C<env OPENSSL_ia32cap=0x16980010 apps/openssl>, or better yet
-C<env OPENSSL_ia32cap=~0x1000000 apps/openssl> would achieve the desired
-effect. Alternatively you can reconfigure the toolkit with no-sse2
-option and recompile.
-
-Less intuitive is clearing bit #28, or ~0x10000000 in the "environment
-variable" terms. The truth is that it's not copied from CPUID output
-verbatim, but is adjusted to reflect whether or not the data cache is
-actually shared between logical cores. This in turn affects the decision
-on whether or not expensive countermeasures against cache-timing attacks
-are applied, most notably in AES assembler module.
+=head2 Notable Capability Bits for LV1
  
-The capability vector is further extended with EBX value returned by
-CPUID with EAX=7 and ECX=0 as input. Following bits are significant:
+The following are notable capability bits from logical vector 1 (LV1)
+resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EBX and
+CPUID.(EAX=07H,ECX=0H).ECX:
  
  =over 4
  
@@ -103,8 +99,7 @@ and RORX;
  
  =item bit #64+19 denoting availability of ADCX and ADOX instructions;
  
-=item bit #64+21 denoting availability of VPMADD52[LH]UQ instructions,
-aka AVX512IFMA extension;
+=item bit #64+21 denoting availability of AVX512IFMA extension;
  
  =item bit #64+29 denoting availability of SHA extension;
  
@@ -118,10 +113,109 @@ aka AVX512IFMA extension;
  
  =back
  
-To control this extended capability word use C<:> as delimiter when
-setting up B<OPENSSL_ia32cap> environment variable. For example assigning
-C<:~0x20> would disable AVX2 code paths, and C<:0> - all post-AVX
-extensions.
+=head2 Notable Capability Bits for LV2
+
+The following are notable capability bits from logical vector 2 (LV2)
+resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EDX and
+CPUID.(EAX=07H,ECX=1H).EAX:
+
+=over 4
+
+=item bit #128+15 denoting availability of Hybrid CPU;
+
+=item bit #128+29 denoting support for IA32_ARCH_CAPABILITIES MSR;
+
+=item bit #128+32 denoting availability of SHA512 extension;
+
+=item bit #128+33 denoting availability of SM3 extension;
+
+=item bit #128+34 denoting availability of SM4 extension;
+
+=item bit #128+55 denoting availability of AVX-IFMA extension;
+
+=back
+
+=head2 Notable Capability Bits for LV3
+
+The following are notable capability bits from logical vector 3 (LV3)
+resulting from the following execution of CPUID.(EAX=07H,ECX=1H).EDX and
+CPUID.(EAX=07H,ECX=1H).EBX:
+
+=over 4
+
+=item bit #192+19 denoting availability of AVX10 Converged Vector ISA extension;
+
+=item bit #192+21 denoting availability of APX_F extension;
+
+=back
+
+=head2 Notable Capability Bits for LV4
+
+The following are notable capability bits from logical vector 4 (LV4)
+resulting from the following execution of CPUID.(EAX=07H,ECX=1H).ECX and
+CPUID.(EAX=24H,ECX=0H).EBX:
+
+=over 4
+
+=item bits #256+32+[0:7] denoting AVX10 Converged Vector ISA Version (8 bits);
+
+=item bit #256+48 denoting AVX10 XMM support;
+
+=item bit #256+49 denoting AVX10 YMM support;
+
+=item bit #256+50 denoting AVX10 ZMM support;
+
+=back
+
+=head2 OPENSSL_ia32cap environment variable
+
+The B<OPENSSL_ia32cap> environment variable provides a mechanism to override
+the default capability vector values at library initialization time.
+The variable consists of a series of 64-bit numbers representing each
+of the logical vectors (LV) described above. Each value is delimited by a 'B<:>'.
+Decimal/Octal/Hexadecimal values representations are supported.
+
+C<env OPENSSL_ia32cap=LV0:LV1:LV2:LV3:LV4>
+
+Used in this form, each non-null logical vector will *overwrite* the entire corresponding
+capability vector pair with the provided value. To keep compatibility with the
+behaviour of the original OPENSSL_ia32cap environment variable
+<env OPENSSL_ia32cap=LV0:LV1>, the next capability vector pairs will be set to zero.
+
+To illustrate, the following will zero all capability bits in logical vectors 1 and further
+(disable all post-AVX extensions):
+
+C<env OPENSSL_ia32cap=:0>
+
+The following will zero all capability bits in logical vectors 2 and further:
+
+C<env OPENSSL_ia32cap=::0>
+
+The following will zero all capability bits only in logical vector 1:
+C<env OPENSSL_ia32cap=:0::::>
+
+A more likely usage scenario would be to disable specific instruction set extensions.
+The 'B<~>' character is used to specify a bit mask of the extensions to be disabled for
+a particular logical vector.
+
+To illustrate, the following will disable AVX2 code paths and further extensions:
+
+C<env OPENSSL_ia32cap=:~0x20000000000>
+
+The following will disable AESNI (LV0 bit 57) and VAES (LV1 bit 41)
+extensions and therefore any code paths using those extensions but leave
+the rest of the logical vectors unchanged:
+
+C<env OPENSSL_ia32cap=~0x200000000000000:~0x20000000000:~0x0:~0x0:~0x0>
+
+=head1 NOTES
+
+Not all capability bits are copied from CPUID output verbatim. An example
+of this is the somewhat less intuitive clearing of LV0 bit #28, or ~0x10000000
+in the "environment variable" terms. It has been adjusted to reflect whether or
+not the data cache is actually shared between logical cores. This in turn affects
+the decision on whether or not expensive countermeasures against cache-timing attacks
+are applied, most notably in AES assembler module.
  
  =head1 RETURN VALUES
  
diff --git a/include/internal/cryptlib.h b/include/internal/cryptlib.h

index 3227f9fcf9bed2094f3a8c72268f5f7bf589633d..da442f8a86f7a78789a10e2f72217f784efce316 100644 (file)
--- a/include/internal/cryptlib.h
+++ b/include/internal/cryptlib.h
@@ -36,8 +36,10 @@ void OPENSSL_cpuid_setup(void);
  #if defined(__i386)   || defined(__i386__)   || defined(_M_IX86) || \
      defined(__x86_64) || defined(__x86_64__) || \
      defined(_M_AMD64) || defined(_M_X64)
+#  define OPENSSL_IA32CAP_P_MAX_INDEXES 10
  extern unsigned int OPENSSL_ia32cap_P[];
  #endif
+
  void OPENSSL_showfatal(const char *fmta, ...);
  int ossl_do_ex_data_init(OSSL_LIB_CTX *ctx);
  void ossl_crypto_cleanup_all_ex_data_int(OSSL_LIB_CTX *ctx);
author	Elizarova, Alina <alina.elizarova@intel.com>
	Wed, 4 Dec 2024 18:29:23 +0000 (10:29 -0800)
committer	Tomas Mraz <tomas@openssl.org>
	Fri, 13 Dec 2024 13:51:22 +0000 (14:51 +0100)
CHANGES.md		patch \| blob \| blame \| history
crypto/cpuid.c		patch \| blob \| blame \| history
crypto/info.c		patch \| blob \| blame \| history
crypto/perlasm/x86gas.pl		patch \| blob \| blame \| history
crypto/perlasm/x86masm.pl		patch \| blob \| blame \| history
crypto/perlasm/x86nasm.pl		patch \| blob \| blame \| history
crypto/x86_64cpuid.pl		patch \| blob \| blame \| history
crypto/x86cpuid.pl		patch \| blob \| blame \| history
doc/man3/OPENSSL_ia32cap.pod		patch \| blob \| blame \| history
include/internal/cryptlib.h		patch \| blob \| blame \| history