rs6000: Modify the way for extra penalized cost

author Kewen Lin <linkw@linux.ibm.com>

Tue, 30 Nov 2021 03:22:27 +0000 (21:22 -0600)

committer Kewen Lin <linkw@linux.ibm.com>

Tue, 30 Nov 2021 03:22:27 +0000 (21:22 -0600)
author Kewen Lin <linkw@linux.ibm.com>
Tue, 30 Nov 2021 03:22:27 +0000 (21:22 -0600)
committer Kewen Lin <linkw@linux.ibm.com>
Tue, 30 Nov 2021 03:22:27 +0000 (21:22 -0600)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c

index e4843eb0f1cf57de3d6d64f363f1ead1c463a203..289c1b3df24c992e86e27180213cc766303c27e6 100644 (file)
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5272,8 +5272,7 @@ public:
  
  protected:
    void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
-                                   vect_cost_model_location, int,
-                                   unsigned int);
+                                   vect_cost_model_location, unsigned int);
    void density_test (loop_vec_info);
    void adjust_vect_cost_per_loop (loop_vec_info);
  
@@ -5414,7 +5413,6 @@ void
  rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
                                                stmt_vec_info stmt_info,
                                                vect_cost_model_location where,
-                                              int stmt_cost,
                                                unsigned int orig_count)
  {
  
@@ -5456,17 +5454,23 @@ rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
         {
           tree vectype = STMT_VINFO_VECTYPE (stmt_info);
           unsigned int nunits = vect_nunits_for_cost (vectype);
-         unsigned int extra_cost = nunits * stmt_cost;
-         /* As function rs6000_builtin_vectorization_cost shows, we have
-            priced much on V16QI/V8HI vector construction as their units,
-            if we penalize them with nunits * stmt_cost, it can result in
-            an unreliable body cost, eg: for V16QI on Power8, stmt_cost
-            is 20 and nunits is 16, the extra cost is 320 which looks
-            much exaggerated.  So let's use one maximum bound for the
-            extra penalized cost for vector construction here.  */
-         const unsigned int MAX_PENALIZED_COST_FOR_CTOR = 12;
-         if (extra_cost > MAX_PENALIZED_COST_FOR_CTOR)
-           extra_cost = MAX_PENALIZED_COST_FOR_CTOR;
+         /* We don't expect strided/elementwise loads for just 1 nunit.  */
+         gcc_assert (nunits > 1);
+         /* i386 port adopts nunits * stmt_cost as the penalized cost
+            for this kind of penalization, we used to follow it but
+            found it could result in an unreliable body cost especially
+            for V16QI/V8HI modes.  To make it better, we choose this
+            new heuristic: for each scalar load, we use 2 as penalized
+            cost for the case with 2 nunits and use 1 for the other
+            cases.  It's without much supporting theory, mainly
+            concluded from the broad performance evaluations on Power8,
+            Power9 and Power10.  One possibly related point is that:
+            vector construction for more units would use more insns,
+            it has more chances to schedule them better (even run in
+            parallelly when enough available units at that time), so
+            it seems reasonable not to penalize that much for them.  */
+         unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
+         unsigned int extra_cost = nunits * adjusted_cost;
           m_extra_ctor_cost += extra_cost;
         }
      }
@@ -5491,8 +5495,7 @@ rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
        retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
        m_costs[where] += retval;
  
-      update_target_cost_per_stmt (kind, stmt_info, where,
-                                  stmt_cost, orig_count);
+      update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
      }
  
    return retval;
author	Kewen Lin <linkw@linux.ibm.com>
	Tue, 30 Nov 2021 03:22:27 +0000 (21:22 -0600)
committer	Kewen Lin <linkw@linux.ibm.com>
	Tue, 30 Nov 2021 03:22:27 +0000 (21:22 -0600)