]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/116352 - SLP scheduling and stmt order
authorRichard Biener <rguenther@suse.de>
Mon, 2 Dec 2024 10:07:46 +0000 (11:07 +0100)
committerRichard Biener <rguenth@gcc.gnu.org>
Mon, 2 Dec 2024 13:04:35 +0000 (14:04 +0100)
The PR uncovers unchecked constraints on the ability to code-generate
with SLP but also latent issues with regard to stmt order checking
since loop (early-break) and BB (for quite some time) vectorization
are no longer constraint to single-BBs.  In particular get_later_stmt
simply compares UIDs of stmts, but that's only reliable when they
are in the same BB.

For the PR in question the problematical case is demoting a SLP node
to external which fails to check we can actually code generate this
in the way we do (using get_later_stmt).  The following thus adds
checking that we demote to external only when all defs are from
the same BB.

We no longer vectorize gcc.dg/vect/bb-slp-49.c but the testcase was
for a wrong-code issue and the vectorization done is a no-op.

PR tree-optimization/116352
PR tree-optimization/117876
* tree-vect-slp.cc (vect_slp_can_convert_to_external): New.
(vect_slp_convert_to_external): Call it.
(vect_build_slp_tree_2): Likewise.

* gcc.dg/vect/pr116352.c: New testcase.
* gcc.dg/vect/bb-slp-49.c: Remove vectorization check.

gcc/testsuite/gcc.dg/vect/bb-slp-49.c
gcc/testsuite/gcc.dg/vect/pr116352.c [new file with mode: 0644]
gcc/tree-vect-slp.cc

index e7101fcff4627bb545549bdfefd33c2ed58aee7b..c0ad5d70a9acde4221e71564c4ba0800f3122a3f 100644 (file)
@@ -23,6 +23,5 @@ main ()
   return 0;
 }
 
-/* See that we vectorize an SLP instance.  */
+/* See that we try to vectorize an SLP instance.  */
 /* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */
-/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr116352.c b/gcc/testsuite/gcc.dg/vect/pr116352.c
new file mode 100644 (file)
index 0000000..3fe537c
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+static void addPrior(float center_x, float center_y, float width, float height,
+                     bool normalized, float *dst)
+{
+  if (normalized)
+    {
+      dst[0] = (center_x - width * 0.5f);
+      dst[1] = (center_y - height * 0.5f);
+      dst[2] = (center_x + width * 0.5f);
+      dst[3] = (center_y + height * 0.5f);
+    }
+  else
+    {
+      dst[0] = center_x - width * 0.5f;
+      dst[1] = center_y - height * 0.5f;
+      dst[2] = center_x + width * 0.5f - 1.0f;
+      dst[3] = center_y + height * 0.5f - 1.0f;
+    }
+}
+void forward(float *outputPtr, int _offsetsXs, float *_offsetsX,
+            float *_offsetsY, float _stepX, float _stepY,
+            bool _bboxesNormalized, float _boxWidth, float _boxHeight)
+{
+  for (int j = 0; j < _offsetsXs; ++j)
+    {
+      float center_x = (_offsetsX[j]) * _stepX;
+      float center_y = (_offsetsY[j]) * _stepY;
+      addPrior(center_x, center_y, _boxWidth, _boxHeight, _bboxesNormalized,
+              outputPtr);
+      outputPtr += 4;
+    }
+}
index ec986cc3f686dc68ead57fc79c626107a7cd3d2d..1799d5a619b1d20e0393021b22f5a8b9cd51841e 100644 (file)
@@ -67,6 +67,7 @@ static int vectorizable_slp_permutation_1 (vec_info *, gimple_stmt_iterator *,
 static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
                                          slp_tree, stmt_vector_for_cost *);
 static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree);
+static bool vect_slp_can_convert_to_external (const vec<stmt_vec_info> &);
 
 static object_allocator<_slp_tree> *slp_tree_pool;
 static slp_tree slp_first_node;
@@ -2887,7 +2888,8 @@ fail:
          for (j = 0; j < group_size; ++j)
            if (!matches[j])
              break;
-         if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype)))
+         if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype))
+             && vect_slp_can_convert_to_external (oprnd_info->def_stmts))
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_NOTE, vect_location,
@@ -7764,6 +7766,24 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
                            node, node_instance, cost_vec);
 }
 
+/* Verify if we can externalize a set of internal defs.  */
+
+static bool
+vect_slp_can_convert_to_external (const vec<stmt_vec_info> &stmts)
+{
+  basic_block bb = NULL;
+  for (stmt_vec_info stmt : stmts)
+    if (!stmt)
+      return false;
+    /* Constant generation uses get_later_stmt which can only handle
+       defs from the same BB.  */
+    else if (!bb)
+      bb = gimple_bb (stmt->stmt);
+    else if (gimple_bb (stmt->stmt) != bb)
+      return false;
+  return true;
+}
+
 /* Try to build NODE from scalars, returning true on success.
    NODE_INSTANCE is the SLP instance that contains NODE.  */
 
@@ -7779,13 +7799,10 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
       || !SLP_TREE_SCALAR_STMTS (node).exists ()
       || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node))
       /* Force the mask use to be built from scalars instead.  */
-      || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)))
+      || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node))
+      || !vect_slp_can_convert_to_external (SLP_TREE_SCALAR_STMTS (node)))
     return false;
 
-  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
-    if (!stmt_info)
-      return false;
-
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                     "Building vector operands of %p from scalars instead\n",