]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/120687 - reduction chain with UB on signed overflow
authorRichard Biener <rguenther@suse.de>
Mon, 20 Oct 2025 12:44:27 +0000 (14:44 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 21 Oct 2025 11:19:04 +0000 (13:19 +0200)
The following adds the ability to discover a reduction chain on a
series of statements that invoke undefined behavior on integer overflow.
This inhibits the reassoc pass from associating stmts in the way
naturally leading to a reduction chain.  The common mistake on the
source side is to rely on the += operator to sum multiple inputs.

After the refactoring of how we handle reduction chains we can
easily use vect_slp_linearize_chain to do this our selves and
rely on the vectorizer punning operations to unsigned given reduction
vectorization always associates.

PR tree-optimization/120687
* tree-vect-slp.cc (vect_analyze_slp_reduc_chain): When
there's no natural reduction chain see if vect_slp_linearize_chain
can recover one and built the SLP instance manually in that
case.
(vect_schedule_slp): Deal with NULL lanes when looking for
stores to remove.
* tree-vect-loop.cc (vect_transform_cycle_phi): Dump when we
are successfully transforming a reduction chain.

* gcc.dg/vect/vect-reduc-chain-4.c: New testcase.

gcc/testsuite/gcc.dg/vect/vect-reduc-chain-4.c [new file with mode: 0644]
gcc/tree-vect-loop.cc
gcc/tree-vect-slp.cc

diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-chain-4.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-chain-4.c
new file mode 100644 (file)
index 0000000..d79676d
--- /dev/null
@@ -0,0 +1,31 @@
+#include "tree-vect.h"
+
+int q[32];
+
+int __attribute__((noipa))
+foo ()
+{
+  int res = 0;
+  for (int i = 0; i < 8; ++i)
+    res += q[4*i] + q[4*i+1] + q[4*i+2] + q[4*i+3];
+  return res;
+}
+
+int main()
+{
+  check_vect ();
+
+  int sum = 0;
+#pragma GCC novector
+  for (int i = 0; i < 32; ++i)
+    {
+      q[i] = i;
+      sum += i;
+    }
+
+  if (foo () != sum)
+    abort ();
+}
+
+/* { dg-final { scan-tree-dump "vectorizing a reduction chain" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
index 455205069f6034e8d0d6cb9c5dd4cd5ee15ad9b9..be684a529db24b5f3cbee93a8d87535077efe743 100644 (file)
@@ -8220,6 +8220,10 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
     /* Leave the scalar phi in place.  */
     return true;
 
+  if (reduc_info && reduc_info->is_reduc_chain && dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "vectorizing a reduction chain\n");
+
   vec_num = vect_get_num_copies (loop_vinfo, slp_node);
 
   /* Check whether we should use a single PHI node and accumulate
index 13a2995592b70e0d72f7c1dcbfd169b8d91e5f2e..53fe643be2c6851473e2ed36c0f5701c0b0952bc 100644 (file)
@@ -4230,9 +4230,142 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
       first = false;
     }
   while (!is_a <gphi *> (STMT_VINFO_STMT (next_stmt)));
-  if (fail || scalar_stmts.length () <= 1)
+  if (fail)
     return false;
 
+  if (scalar_stmts.length () == 1
+      && code.is_tree_code ()
+      && associative_tree_code ((tree_code)code))
+    {
+      auto_vec<chain_op_t> chain;
+      auto_vec<std::pair<tree_code, gimple *> > worklist;
+      gimple *op_stmt = NULL, *other_op_stmt = NULL;
+      vect_slp_linearize_chain (vinfo, worklist, chain, (tree_code)code,
+                               scalar_stmts[0]->stmt, op_stmt, other_op_stmt,
+                               NULL);
+      if (chain.length () < 3)
+       {
+         scalar_stmts.release ();
+         return false;
+       }
+
+      scalar_stmts.truncate (0);
+      stmt_vec_info tail = NULL;
+      for (auto el : chain)
+       {
+         if (el.dt == vect_external_def
+             || el.dt == vect_constant_def
+             || el.code != (tree_code) code)
+           {
+             scalar_stmts.release ();
+             return false;
+           }
+         stmt_vec_info stmt = vinfo->lookup_def (el.op);
+         if (STMT_VINFO_REDUC_IDX (stmt) != -1
+             || STMT_VINFO_REDUC_DEF (stmt))
+           {
+             gcc_assert (tail == NULL);
+             tail = stmt;
+             continue;
+           }
+         scalar_stmts.safe_push (stmt);
+       }
+      gcc_assert (tail);
+
+      if (dump_enabled_p ())
+       {
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "Starting SLP discovery of reduction chain for\n");
+         for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "  %G", scalar_stmts[i]->stmt);
+       }
+
+      unsigned int group_size = scalar_stmts.length ();
+      bool *matches = XALLOCAVEC (bool, group_size);
+      poly_uint64 max_nunits = 1;
+      unsigned tree_size = 0;
+      slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+                                          &max_nunits, matches, limit,
+                                          &tree_size, bst_map);
+      if (!node)
+       {
+         scalar_stmts.release ();
+         return false;
+       }
+
+      unsigned cycle_id = vinfo->reduc_infos.length ();
+      vect_reduc_info reduc_info = new vect_reduc_info_s ();
+      vinfo->reduc_infos.safe_push (reduc_info);
+      VECT_REDUC_INFO_DEF_TYPE (reduc_info) = STMT_VINFO_DEF_TYPE (next_stmt);
+      VECT_REDUC_INFO_TYPE (reduc_info) = STMT_VINFO_REDUC_TYPE (next_stmt);
+      VECT_REDUC_INFO_CODE (reduc_info) = STMT_VINFO_REDUC_CODE (next_stmt);
+      VECT_REDUC_INFO_FN (reduc_info) = IFN_LAST;
+      reduc_info->is_reduc_chain = true;
+
+      /* Build the node for the PHI and possibly the conversion(s?).  */
+      slp_tree phis = vect_create_new_slp_node (2, ERROR_MARK);
+      SLP_TREE_REPRESENTATIVE (phis) = next_stmt;
+      phis->cycle_info.id = cycle_id;
+      SLP_TREE_LANES (phis) = group_size;
+      SLP_TREE_VECTYPE (phis) = SLP_TREE_VECTYPE (node);
+      /* ???  vect_cse_slp_nodes cannot cope with cycles without any
+        SLP_TREE_SCALAR_STMTS.  */
+      SLP_TREE_SCALAR_STMTS (phis).create (group_size);
+      for (unsigned i = 0; i < group_size; ++i)
+       SLP_TREE_SCALAR_STMTS (phis).quick_push (next_stmt);
+
+      slp_tree reduc = vect_create_new_slp_node (2, ERROR_MARK);
+      SLP_TREE_REPRESENTATIVE (reduc) = scalar_stmt;
+      SLP_TREE_CHILDREN (reduc).quick_push (phis);
+      SLP_TREE_CHILDREN (reduc).quick_push (node);
+      reduc->cycle_info.id = cycle_id;
+      SLP_TREE_REDUC_IDX (reduc) = 0;
+      SLP_TREE_LANES (reduc) = group_size;
+      SLP_TREE_VECTYPE (reduc) = SLP_TREE_VECTYPE (node);
+      /* ???  For the reduction epilogue we need a live lane.  */
+      SLP_TREE_SCALAR_STMTS (reduc).create (group_size);
+      SLP_TREE_SCALAR_STMTS (reduc).quick_push (scalar_stmt);
+      for (unsigned i = 1; i < group_size; ++i)
+       SLP_TREE_SCALAR_STMTS (reduc).quick_push (NULL);
+
+      edge le = loop_latch_edge (LOOP_VINFO_LOOP (vinfo));
+      SLP_TREE_CHILDREN (phis).quick_push (NULL);
+      SLP_TREE_CHILDREN (phis).quick_push (NULL);
+      SLP_TREE_CHILDREN (phis)[le->dest_idx] = reduc;
+      SLP_TREE_REF_COUNT (reduc)++;
+
+      /* Create a new SLP instance.  */
+      slp_instance new_instance = XNEW (class _slp_instance);
+      SLP_INSTANCE_TREE (new_instance) = reduc;
+      SLP_INSTANCE_LOADS (new_instance) = vNULL;
+      SLP_INSTANCE_ROOT_STMTS (new_instance) = vNULL;
+      SLP_INSTANCE_REMAIN_DEFS (new_instance) = vNULL;
+      SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain;
+      new_instance->reduc_phis = NULL;
+      new_instance->cost_vec = vNULL;
+      new_instance->subgraph_entries = vNULL;
+
+      vinfo->slp_instances.safe_push (new_instance);
+
+      if (dump_enabled_p ())
+       {
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "Final SLP tree for instance %p:\n",
+                          (void *) new_instance);
+         vect_print_slp_graph (MSG_NOTE, vect_location,
+                               SLP_INSTANCE_TREE (new_instance));
+       }
+
+      return true;
+    }
+
+  if (scalar_stmts.length () <= 1)
+    {
+      scalar_stmts.release ();
+      return false;
+    }
+
   scalar_stmts.reverse ();
   stmt_vec_info reduc_phi_info = next_stmt;
 
@@ -12046,7 +12179,8 @@ vect_schedule_slp (vec_info *vinfo, const vec<slp_instance> &slp_instances)
       /* Remove vectorized stores original scalar stmts.  */
       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info); j++)
         {
-         if (!STMT_VINFO_DATA_REF (store_info)
+         if (!store_info
+             || !STMT_VINFO_DATA_REF (store_info)
              || !DR_IS_WRITE (STMT_VINFO_DATA_REF (store_info)))
            break;