]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
x86: Set preferred CPU features on the KH-40000 and KX-7000 Zhaoxin processors
authorMayShao-oc <MayShao-oc@zhaoxin.com>
Sat, 29 Jun 2024 03:58:26 +0000 (11:58 +0800)
committerH.J. Lu <hjl.tools@gmail.com>
Sun, 30 Jun 2024 13:26:43 +0000 (06:26 -0700)
Fix code formatting under the Zhaoxin branch and add comments for
different Zhaoxin models.

Unaligned AVX load are slower on KH-40000 and KX-7000, so disable
the AVX_Fast_Unaligned_Load.

Enable Prefer_No_VZEROUPPER and Fast_Unaligned_Load features to
use sse2_unaligned version of memset,strcpy and strcat.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
sysdeps/x86/cpu-features.c

index 3d7c2819d7cc6643ce61670474785aea7c0c131f..1927f65699957f6762a44e4cfe8c5b64f771a103 100644 (file)
@@ -1023,39 +1023,58 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
 
       model += extended_model;
       if (family == 0x6)
-        {
-          if (model == 0xf || model == 0x19)
-            {
+       {
+         /* Tuning for older Zhaoxin processors.  */
+         if (model == 0xf || model == 0x19)
+           {
              CPU_FEATURE_UNSET (cpu_features, AVX);
              CPU_FEATURE_UNSET (cpu_features, AVX2);
 
-              cpu_features->preferred[index_arch_Slow_SSE4_2]
-                |= bit_arch_Slow_SSE4_2;
+             cpu_features->preferred[index_arch_Slow_SSE4_2]
+                 |= bit_arch_Slow_SSE4_2;
 
+             /*  Unaligned AVX loads are slower.  */
              cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
-               &= ~bit_arch_AVX_Fast_Unaligned_Load;
-            }
-        }
+                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
+           }
+       }
       else if (family == 0x7)
-        {
-         if (model == 0x1b)
+       {
+         switch (model)
            {
+             /* Wudaokou microarch tuning.  */
+           case 0x1b:
              CPU_FEATURE_UNSET (cpu_features, AVX);
              CPU_FEATURE_UNSET (cpu_features, AVX2);
 
              cpu_features->preferred[index_arch_Slow_SSE4_2]
-               |= bit_arch_Slow_SSE4_2;
+                 |= bit_arch_Slow_SSE4_2;
 
              cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
-               &= ~bit_arch_AVX_Fast_Unaligned_Load;
-           }
-         else if (model == 0x3b)
-           {
+                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
+             break;
+
+             /* Lujiazui microarch tuning.  */
+           case 0x3b:
              CPU_FEATURE_UNSET (cpu_features, AVX);
              CPU_FEATURE_UNSET (cpu_features, AVX2);
 
              cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
-               &= ~bit_arch_AVX_Fast_Unaligned_Load;
+                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
+             break;
+
+             /* Yongfeng and Shijidadao mircoarch tuning.  */
+           case 0x5b:
+           case 0x6b:
+             cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
+
+             /* To use sse2_unaligned versions of memset, strcpy and strcat.
+              */
+             cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
+                 |= (bit_arch_Prefer_No_VZEROUPPER
+                     | bit_arch_Fast_Unaligned_Load);
+             break;
            }
        }
     }