]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
x86-64: Remove Prefer_AVX2_STRCMP
authorH.J. Lu <hjl.tools@gmail.com>
Fri, 29 Oct 2021 19:56:53 +0000 (12:56 -0700)
committerH.J. Lu <hjl.tools@gmail.com>
Mon, 1 Nov 2021 14:53:04 +0000 (07:53 -0700)
Remove Prefer_AVX2_STRCMP to enable EVEX strcmp.  When comparing 2 32-byte
strings, EVEX strcmp has been improved to require 1 load, 1 VPTESTM, 1
VPCMP, 1 KMOVD and 1 INCL instead of 2 loads, 3 VPCMPs, 2 KORDs, 1 KMOVD
and 1 TESTL while AVX2 strcmp requires 1 load, 2 VPCMPEQs, 1 VPMINU, 1
VPMOVMSKB and 1 TESTL.  EVEX strcmp is now faster than AVX2 strcmp by up
to 40% on Tiger Lake and Ice Lake.

sysdeps/x86/cpu-features.c
sysdeps/x86/cpu-tunables.c
sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
sysdeps/x86_64/multiarch/strcmp.c
sysdeps/x86_64/multiarch/strncmp.c

index 645bba63147f6589eb6ae716b7293f42f7e41d9a..be2498b2e7cb6216f0926c3db1912d247e617ff9 100644 (file)
@@ -546,14 +546,6 @@ init_cpu_features (struct cpu_features *cpu_features)
          if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
            cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
              |= bit_arch_Prefer_No_VZEROUPPER;
-
-         /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp
-            requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp
-            requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB,
-            AVX2 strcmp is faster than EVEX strcmp.  */
-         if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
-           cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP]
-             |= bit_arch_Prefer_AVX2_STRCMP;
        }
 
       /* Avoid avoid short distance REP MOVSB on processor with FSRM.  */
index 00fe5045eb56eb076d08f55068692bcf1a1cd2c7..61b05e5b1d56179470d03db8933e5d48257867fe 100644 (file)
@@ -239,8 +239,6 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
              CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features,
                                                Fast_Copy_Backward,
                                                disable, 18);
-             CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH
-               (n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18);
            }
          break;
        case 19:
index d7c93f00c5928a30bb2a63be73b1cdc5c74a9ea3..1530d594b3a0c88e4a0ab01f8af079d21686f37a 100644 (file)
@@ -32,5 +32,4 @@ BIT (Prefer_ERMS)
 BIT (Prefer_No_AVX512)
 BIT (MathVec_Prefer_No_AVX512)
 BIT (Prefer_FSRM)
-BIT (Prefer_AVX2_STRCMP)
 BIT (Avoid_Short_Distance_REP_MOVSB)
index 62b7abeeee646ab472b71b98020d9e1909dd2044..7c2901bf444562596d7ba9c785adf05925356b04 100644 (file)
@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void)
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-         && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
-         && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
+         && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
        return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
index 60ba0fe356b31779d5299b67707877bdec92d20c..f94a421784bfe9235d7cc57ce76df7c058fe202d 100644 (file)
@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void)
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-         && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
-         && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
+         && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
        return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))