]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
x86-64: Remove Prefer_AVX2_STRCMP
authorH.J. Lu <hjl.tools@gmail.com>
Fri, 29 Oct 2021 19:56:53 +0000 (12:56 -0700)
committerSunil K Pandey <skpgkp2@gmail.com>
Mon, 2 May 2022 20:01:30 +0000 (13:01 -0700)
Remove Prefer_AVX2_STRCMP to enable EVEX strcmp.  When comparing 2 32-byte
strings, EVEX strcmp has been improved to require 1 load, 1 VPTESTM, 1
VPCMP, 1 KMOVD and 1 INCL instead of 2 loads, 3 VPCMPs, 2 KORDs, 1 KMOVD
and 1 TESTL while AVX2 strcmp requires 1 load, 2 VPCMPEQs, 1 VPMINU, 1
VPMOVMSKB and 1 TESTL.  EVEX strcmp is now faster than AVX2 strcmp by up
to 40% on Tiger Lake and Ice Lake.

(cherry picked from commit 14dbbf46a007ae5df36646b51ad0c9e5f5259f30)

sysdeps/x86/cpu-features.c
sysdeps/x86/cpu-tunables.c
sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
sysdeps/x86_64/multiarch/strcmp.c
sysdeps/x86_64/multiarch/strncmp.c

index 76882dc0ed94ba4b9d43d480f5aeb759ec5dca7f..e7dc25aaf534eb89f263b861b770e4f38fc21c14 100644 (file)
@@ -563,14 +563,6 @@ disable_tsx:
          if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
            cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
              |= bit_arch_Prefer_No_VZEROUPPER;
-
-         /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp
-            requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp
-            requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB,
-            AVX2 strcmp is faster than EVEX strcmp.  */
-         if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
-           cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP]
-             |= bit_arch_Prefer_AVX2_STRCMP;
        }
 
       /* Avoid avoid short distance REP MOVSB on processor with FSRM.  */
index a90df39b7885c8f6c1324eb78ad4919718e58cbb..126896f41b14bb289eee3c2c7f5f3aec7ccce7d0 100644 (file)
@@ -238,8 +238,6 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
              CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features,
                                                Fast_Copy_Backward,
                                                disable, 18);
-             CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH
-               (n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18);
            }
          break;
        case 19:
index d7c93f00c5928a30bb2a63be73b1cdc5c74a9ea3..1530d594b3a0c88e4a0ab01f8af079d21686f37a 100644 (file)
@@ -32,5 +32,4 @@ BIT (Prefer_ERMS)
 BIT (Prefer_No_AVX512)
 BIT (MathVec_Prefer_No_AVX512)
 BIT (Prefer_FSRM)
-BIT (Prefer_AVX2_STRCMP)
 BIT (Avoid_Short_Distance_REP_MOVSB)
index 62b7abeeee646ab472b71b98020d9e1909dd2044..7c2901bf444562596d7ba9c785adf05925356b04 100644 (file)
@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void)
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-         && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
-         && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
+         && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
        return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
index 60ba0fe356b31779d5299b67707877bdec92d20c..f94a421784bfe9235d7cc57ce76df7c058fe202d 100644 (file)
@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void)
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-         && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
-         && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
+         && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
        return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))