From: MayShao-oc Date: Sat, 29 Jun 2024 03:58:26 +0000 (+0800) Subject: x86: Set preferred CPU features on the KH-40000 and KX-7000 Zhaoxin processors X-Git-Tag: glibc-2.40~58 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=44d757eb9f4484dbc3aa32042ab64cdf9374e093;p=thirdparty%2Fglibc.git x86: Set preferred CPU features on the KH-40000 and KX-7000 Zhaoxin processors Fix code formatting under the Zhaoxin branch and add comments for different Zhaoxin models. Unaligned AVX load are slower on KH-40000 and KX-7000, so disable the AVX_Fast_Unaligned_Load. Enable Prefer_No_VZEROUPPER and Fast_Unaligned_Load features to use sse2_unaligned version of memset,strcpy and strcat. Reviewed-by: Noah Goldstein --- diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 3d7c2819d7..1927f65699 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -1023,39 +1023,58 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht model += extended_model; if (family == 0x6) - { - if (model == 0xf || model == 0x19) - { + { + /* Tuning for older Zhaoxin processors. */ + if (model == 0xf || model == 0x19) + { CPU_FEATURE_UNSET (cpu_features, AVX); CPU_FEATURE_UNSET (cpu_features, AVX2); - cpu_features->preferred[index_arch_Slow_SSE4_2] - |= bit_arch_Slow_SSE4_2; + cpu_features->preferred[index_arch_Slow_SSE4_2] + |= bit_arch_Slow_SSE4_2; + /* Unaligned AVX loads are slower. */ cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - &= ~bit_arch_AVX_Fast_Unaligned_Load; - } - } + &= ~bit_arch_AVX_Fast_Unaligned_Load; + } + } else if (family == 0x7) - { - if (model == 0x1b) + { + switch (model) { + /* Wudaokou microarch tuning. */ + case 0x1b: CPU_FEATURE_UNSET (cpu_features, AVX); CPU_FEATURE_UNSET (cpu_features, AVX2); cpu_features->preferred[index_arch_Slow_SSE4_2] - |= bit_arch_Slow_SSE4_2; + |= bit_arch_Slow_SSE4_2; cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - &= ~bit_arch_AVX_Fast_Unaligned_Load; - } - else if (model == 0x3b) - { + &= ~bit_arch_AVX_Fast_Unaligned_Load; + break; + + /* Lujiazui microarch tuning. */ + case 0x3b: CPU_FEATURE_UNSET (cpu_features, AVX); CPU_FEATURE_UNSET (cpu_features, AVX2); cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - &= ~bit_arch_AVX_Fast_Unaligned_Load; + &= ~bit_arch_AVX_Fast_Unaligned_Load; + break; + + /* Yongfeng and Shijidadao mircoarch tuning. */ + case 0x5b: + case 0x6b: + cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] + &= ~bit_arch_AVX_Fast_Unaligned_Load; + + /* To use sse2_unaligned versions of memset, strcpy and strcat. + */ + cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER] + |= (bit_arch_Prefer_No_VZEROUPPER + | bit_arch_Fast_Unaligned_Load); + break; } } }