tree-optimization/100089 - avoid leaving scalar if-converted code around

author Richard Biener <rguenther@suse.de>

Tue, 24 Aug 2021 10:25:25 +0000 (12:25 +0200)

committer Richard Biener <rguenther@suse.de>

Tue, 24 Aug 2021 12:23:00 +0000 (14:23 +0200)
author Richard Biener <rguenther@suse.de>
Tue, 24 Aug 2021 10:25:25 +0000 (12:25 +0200)
committer Richard Biener <rguenther@suse.de>
Tue, 24 Aug 2021 12:23:00 +0000 (14:23 +0200)
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

index d2f6a16f22063f4d8f3ecb00ce0c9f04d5a6b9b0..edc11c62793a3cdf2d96a846c5bd66aaf6431f6d 100644 (file)
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -5233,7 +5233,8 @@ li_cost_vec_cmp (const void *a_, const void *b_)
  
  static bool
  vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
-                                   vec<slp_instance> slp_instances)
+                                   vec<slp_instance> slp_instances,
+                                   loop_p orig_loop)
  {
    slp_instance instance;
    int i;
@@ -5270,6 +5271,30 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
        vector_costs.safe_splice (instance->cost_vec);
        instance->cost_vec.release ();
      }
+  /* When we're vectorizing an if-converted loop body with the
+     very-cheap cost model make sure we vectorized all if-converted
+     code.  */
+  bool force_not_profitable = false;
+  if (orig_loop && flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP)
+    {
+      gcc_assert (bb_vinfo->bbs.length () == 1);
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]);
+          !gsi_end_p (gsi); gsi_next (&gsi))
+       {
+         /* The costing above left us with DCEable vectorized scalar
+            stmts having the visited flag set.  */
+         if (gimple_visited_p (gsi_stmt (gsi)))
+           continue;
+
+         if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)))
+           if (gimple_assign_rhs_code (ass) == COND_EXPR)
+             {
+               force_not_profitable = true;
+               break;
+             }
+       }
+    }
+
    /* Unset visited flag.  */
    stmt_info_for_cost *cost;
    FOR_EACH_VEC_ELT (scalar_costs, i, cost)
@@ -5394,9 +5419,14 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
        return false;
      }
  
+  if (dump_enabled_p () && force_not_profitable)
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "not profitable because of unprofitable if-converted "
+                    "scalar code\n");
+
    scalar_costs.release ();
    vector_costs.release ();
-  return true;
+  return !force_not_profitable;
  }
  
  /* qsort comparator for lane defs.  */
@@ -5810,7 +5840,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
  
  static bool
  vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
-                vec<int> *dataref_groups, unsigned int n_stmts)
+                vec<int> *dataref_groups, unsigned int n_stmts,
+                loop_p orig_loop)
  {
    bb_vec_info bb_vinfo;
    auto_vector_modes vector_modes;
@@ -5859,7 +5890,9 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
               vect_location = instance->location ();
               if (!unlimited_cost_model (NULL)
                   && !vect_bb_vectorization_profitable_p
-                       (bb_vinfo, instance->subgraph_entries))
+                       (bb_vinfo,
+                        orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
+                        : instance->subgraph_entries, orig_loop))
                 {
                   if (dump_enabled_p ())
                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5877,7 +5910,9 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
                                  "using SLP\n");
               vectorized = true;
  
-             vect_schedule_slp (bb_vinfo, instance->subgraph_entries);
+             vect_schedule_slp (bb_vinfo,
+                                orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
+                                : instance->subgraph_entries);
  
               unsigned HOST_WIDE_INT bytes;
               if (dump_enabled_p ())
@@ -5892,6 +5927,11 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
                                      "basic block part vectorized using "
                                      "variable length vectors\n");
                 }
+
+             /* When we're called from loop vectorization we're considering
+                all subgraphs at once.  */
+             if (orig_loop)
+               break;
             }
         }
        else
@@ -5959,7 +5999,7 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
     true if anything in the basic-block was vectorized.  */
  
  static bool
-vect_slp_bbs (const vec<basic_block> &bbs)
+vect_slp_bbs (const vec<basic_block> &bbs, loop_p orig_loop)
  {
    vec<data_reference_p> datarefs = vNULL;
    auto_vec<int> dataref_groups;
@@ -5989,18 +6029,20 @@ vect_slp_bbs (const vec<basic_block> &bbs)
        ++current_group;
      }
  
-  return vect_slp_region (bbs, datarefs, &dataref_groups, insns);
+  return vect_slp_region (bbs, datarefs, &dataref_groups, insns, orig_loop);
  }
  
-/* Main entry for the BB vectorizer.  Analyze and transform BB, returns
-   true if anything in the basic-block was vectorized.  */
+/* Special entry for the BB vectorizer.  Analyze and transform a single
+   if-converted BB with ORIG_LOOPs body being the not if-converted
+   representation.  Returns true if anything in the basic-block was
+   vectorized.  */
  
  bool
-vect_slp_bb (basic_block bb)
+vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop)
  {
    auto_vec<basic_block> bbs;
    bbs.safe_push (bb);
-  return vect_slp_bbs (bbs);
+  return vect_slp_bbs (bbs, orig_loop);
  }
  
  /* Main entry for the BB vectorizer.  Analyze and transform BB, returns
@@ -6051,7 +6093,7 @@ vect_slp_function (function *fun)
  
        if (split && !bbs.is_empty ())
         {
-         r |= vect_slp_bbs (bbs);
+         r |= vect_slp_bbs (bbs, NULL);
           bbs.truncate (0);
           bbs.quick_push (bb);
         }
@@ -6069,13 +6111,13 @@ vect_slp_function (function *fun)
               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                "splitting region at control altering "
                                "definition %G", last);
-           r |= vect_slp_bbs (bbs);
+           r |= vect_slp_bbs (bbs, NULL);
             bbs.truncate (0);
           }
      }
  
    if (!bbs.is_empty ())
-    r |= vect_slp_bbs (bbs);
+    r |= vect_slp_bbs (bbs, NULL);
  
    free (rpo);
  
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c

index 813f4683d1efff6f654062e0c0bde74805bc1f85..3aa3e2a678328baccc4869fe2c6546e700b92255 100644 (file)
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1033,10 +1033,7 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
          only non-if-converted parts took part in BB vectorization.  */
        if (flag_tree_slp_vectorize != 0
           && loop_vectorized_call
-         && ! loop->inner
-         /* This would purely be a workaround and should be removed
-            once PR100089 is fixed.  */
-         && flag_vect_cost_model != VECT_COST_MODEL_VERY_CHEAP)
+         && ! loop->inner)
         {
           basic_block bb = loop->header;
           bool require_loop_vectorize = false;
@@ -1062,12 +1059,17 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
               gimple_set_uid (stmt, -1);
               gimple_set_visited (stmt, false);
             }
-         if (!require_loop_vectorize && vect_slp_bb (bb))
+         if (!require_loop_vectorize)
             {
-             fold_loop_internal_call (loop_vectorized_call,
-                                      boolean_true_node);
-             loop_vectorized_call = NULL;
-             ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
+             tree arg = gimple_call_arg (loop_vectorized_call, 1);
+             class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
+             if (vect_slp_if_converted_bb (bb, scalar_loop))
+               {
+                 fold_loop_internal_call (loop_vectorized_call,
+                                          boolean_true_node);
+                 loop_vectorized_call = NULL;
+                 ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
+               }
             }
         }
        /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 9c2c29d61fae5e651a112b103482131e3d646fb6..72e018e8eacec672e709cbc6f30a0c014026e5ee 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2087,7 +2087,7 @@ extern void vect_gather_slp_loads (vec_info *);
  extern void vect_get_slp_defs (slp_tree, vec<tree> *);
  extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
                                unsigned n = -1U);
-extern bool vect_slp_bb (basic_block);
+extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop);
  extern bool vect_slp_function (function *);
  extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
  extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
author	Richard Biener <rguenther@suse.de>
	Tue, 24 Aug 2021 10:25:25 +0000 (12:25 +0200)
committer	Richard Biener <rguenther@suse.de>
	Tue, 24 Aug 2021 12:23:00 +0000 (14:23 +0200)
gcc/tree-vect-slp.c		patch \| blob \| blame \| history
gcc/tree-vectorizer.c		patch \| blob \| blame \| history
gcc/tree-vectorizer.h		patch \| blob \| blame \| history