Disable vect unroll for znver2/Znver1.

author liuhongt <hongtao.liu@intel.com>

Fri, 19 Sep 2025 02:13:22 +0000 (19:13 -0700)

committer liuhongt <hongtao.liu@intel.com>

Tue, 23 Sep 2025 07:31:52 +0000 (00:31 -0700)
author liuhongt <hongtao.liu@intel.com>
Fri, 19 Sep 2025 02:13:22 +0000 (19:13 -0700)
committer liuhongt <hongtao.liu@intel.com>
Tue, 23 Sep 2025 07:31:52 +0000 (00:31 -0700)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc

index 5ef7c315091db68c415a7cf8e0264c95cdd3a77a..6eb26cd7b824f6a4b20021de540dd97750cafe3e 100644 (file)
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -26144,6 +26144,14 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
    /* Record number of load/store/gather/scatter in vectorized body.  */
    if (where == vect_body && !m_costing_for_scalar)
      {
+      int scale = 1;
+      if (vectype
+         && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
+             && TARGET_AVX512_SPLIT_REGS)
+             || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
+                 && TARGET_AVX256_SPLIT_REGS)))
+       scale = 2;
+
        switch (kind)
         {
           /* Emulated gather/scatter or any scalarization.  */
@@ -26166,7 +26174,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
               /* Handle __builtin_fma.  */
               if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
                 {
-                 m_num_reduc[X86_REDUC_FMA] += count;
+                 m_num_reduc[X86_REDUC_FMA] += count * scale;
                   break;
                 }
  
@@ -26203,12 +26211,12 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
                       && (def = SSA_NAME_DEF_STMT (rhs1), true)
                       && is_gimple_assign (def)
                       && gimple_assign_rhs_code (def) == MULT_EXPR)
-                   m_num_reduc[X86_REDUC_FMA] += count;
+                   m_num_reduc[X86_REDUC_FMA] += count * scale;
                   else if (TREE_CODE (rhs2) == SSA_NAME
                            && (def = SSA_NAME_DEF_STMT (rhs2), true)
                            && is_gimple_assign (def)
                            && gimple_assign_rhs_code (def) == MULT_EXPR)
-                   m_num_reduc[X86_REDUC_FMA] += count;
+                   m_num_reduc[X86_REDUC_FMA] += count * scale;
                   break;
  
                   /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
@@ -26237,7 +26245,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
                              ? TARGET_AVX10_2
                              : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
                     }
-                 m_num_reduc[X86_REDUC_DOT_PROD] += count;
+                 m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
  
                   /* Dislike to do unroll and partial sum for
                      emulated DOT_PROD_EXPR.  */
@@ -26246,7 +26254,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
                   break;
  
                 case SAD_EXPR:
-                 m_num_reduc[X86_REDUC_SAD] += count;
+                 m_num_reduc[X86_REDUC_SAD] += count * scale;
                   break;
  
                 default:
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h

index 1649ea2fe3e5604c2be7afc6bdd663d31581a666..c7a0f6805ca14a8508884e148e5beeafa011d009 100644 (file)
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1744,7 +1744,7 @@ struct processor_costs znver1_cost = {
                                            FMA/DOT_PROD_EXPR/SAD_EXPR,
                                            it's used to determine unroll
                                            factor in the vectorizer.  */
-  4,                                   /* Limit how much the autovectorizer
+  1,                                   /* Limit how much the autovectorizer
                                            may unroll a loop.  */
    znver1_memcpy,
    znver1_memset,
@@ -1918,7 +1918,7 @@ struct processor_costs znver2_cost = {
                                            FMA/DOT_PROD_EXPR/SAD_EXPR,
                                            it's used to determine unroll
                                            factor in the vectorizer.  */
-  4,                                   /* Limit how much the autovectorizer
+  1,                                   /* Limit how much the autovectorizer
                                            may unroll a loop.  */
    znver2_memcpy,
    znver2_memset,
author	liuhongt <hongtao.liu@intel.com>
	Fri, 19 Sep 2025 02:13:22 +0000 (19:13 -0700)
committer	liuhongt <hongtao.liu@intel.com>
	Tue, 23 Sep 2025 07:31:52 +0000 (00:31 -0700)
gcc/config/i386/i386.cc		patch \| blob \| blame \| history
gcc/config/i386/x86-tune-costs.h		patch \| blob \| blame \| history