Zen4 tuning part 2

author Jan Hubicka <jh@suse.cz>

Thu, 22 Dec 2022 09:55:46 +0000 (10:55 +0100)

committer Jan Hubicka <jh@suse.cz>

Thu, 18 Jan 2024 15:07:08 +0000 (16:07 +0100)
author Jan Hubicka <jh@suse.cz>
Thu, 22 Dec 2022 09:55:46 +0000 (10:55 +0100)
committer Jan Hubicka <jh@suse.cz>
Thu, 18 Jan 2024 15:07:08 +0000 (16:07 +0100)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c

index d95adaf962b7bbfd6f037a7713d41430e3978a5c..dc8228e8a8766f41723e0bd9bfa06b591c3fd33e 100644 (file)
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -7362,6 +7362,8 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
  
        if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
         move_mode = TImode;
+      if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256)
+       move_mode = OImode;
  
        /* Find the corresponding vector mode with the same size as MOVE_MODE.
          MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c

index 53a8271391585dd13cb24e3a05f56afe49b27694..a5c8f7eb0c5f057ebeff2b569f5641282a12a4b0 100644 (file)
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -3067,6 +3067,8 @@ ix86_option_override_internal (bool main_args_p,
      }
  
    if (ix86_tune_features [X86_TUNE_AVOID_256FMA_CHAINS])
+    SET_OPTION_IF_UNSET (opts, opts_set, param_avoid_fma_max_bits, 512);
+  else if (ix86_tune_features [X86_TUNE_AVOID_256FMA_CHAINS])
      SET_OPTION_IF_UNSET (opts, opts_set, param_avoid_fma_max_bits, 256);
    else if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS])
      SET_OPTION_IF_UNSET (opts, opts_set, param_avoid_fma_max_bits, 128);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 6bddd9caaa743ab1b634da217f6875f936c6380f..2a27c521a28750b90dc29ecd0633553aee38480f 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19652,10 +19652,13 @@ ix86_vec_cost (machine_mode mode, int cost)
  
    if (GET_MODE_BITSIZE (mode) == 128
        && TARGET_SSE_SPLIT_REGS)
-    return cost * 2;
-  if (GET_MODE_BITSIZE (mode) > 128
+    return cost * GET_MODE_BITSIZE (mode) / 64;
+  else if (GET_MODE_BITSIZE (mode) > 128
        && TARGET_AVX256_SPLIT_REGS)
      return cost * GET_MODE_BITSIZE (mode) / 128;
+  else if (GET_MODE_BITSIZE (mode) > 256
+      && TARGET_AVX512_SPLIT_REGS)
+    return cost * GET_MODE_BITSIZE (mode) / 256;
    return cost;
  }
  
@@ -22071,7 +22074,9 @@ ix86_reassociation_width (unsigned int op, machine_mode mode)
         return 1;
  
        /* Account for targets that splits wide vectors into multiple parts.  */
-      if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
+      if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
+       div = GET_MODE_BITSIZE (mode) / 256;
+      else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
         div = GET_MODE_BITSIZE (mode) / 128;
        else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
         div = GET_MODE_BITSIZE (mode) / 64;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

index f75529b9d21bcb7e6cd4f8181e3705aa7d6ab6f6..910994c6dd26099d6e64bc852026406f78830ead 100644 (file)
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -625,6 +625,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
         ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
  #define TARGET_AVX256_SPLIT_REGS \
         ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
+#define TARGET_AVX512_SPLIT_REGS \
+       ix86_tune_features[X86_TUNE_AVX512_SPLIT_REGS]
  #define TARGET_GENERAL_REGS_SSE_SPILL \
         ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
  #define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def

index b390b29843ad77c1d06b4837e971555a9faecb8e..e47a33d301d16e961a606b6058075027a20281f3 100644 (file)
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -450,7 +450,12 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER)
  
  /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
     smaller FMA chain.  */
-DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3)
+DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 | m_ZNVER4)
+
+/* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or
+   smaller FMA chain.  */
+DEF_TUNE (X86_TUNE_AVOID_512FMA_CHAINS, "avoid_fma512_chains", m_ZNVER4)
+
  
  /*****************************************************************************/
  /* AVX instruction selection tuning (some of SSE flags affects AVX, too)     */
@@ -477,7 +482,10 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
  
  /* X86_TUNE_AVX256_OPTIMAL: Use 256-bit AVX instructions instead of 512-bit AVX
     instructions in the auto-vectorizer.  */
-DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512)
+DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512 | m_ZNVER4)
+
+/* X86_TUNE_AVX256_SPLIT_REGS: if true, AVX512 ops are split into two AVX256 ops.  */
+DEF_TUNE (X86_TUNE_AVX512_SPLIT_REGS, "avx512_split_regs", m_ZNVER4)
  
  /*****************************************************************************/
  /* Historical relics: tuning flags that helps a specific old CPU designs     */
author	Jan Hubicka <jh@suse.cz>
	Thu, 22 Dec 2022 09:55:46 +0000 (10:55 +0100)
committer	Jan Hubicka <jh@suse.cz>
	Thu, 18 Jan 2024 15:07:08 +0000 (16:07 +0100)
gcc/config/i386/i386-expand.c		patch \| blob \| blame \| history
gcc/config/i386/i386-options.c		patch \| blob \| blame \| history
gcc/config/i386/i386.c		patch \| blob \| blame \| history
gcc/config/i386/i386.h		patch \| blob \| blame \| history
gcc/config/i386/x86-tune.def		patch \| blob \| blame \| history