]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/122406 - incomplete handling of reduction chain with conversion
authorRichard Biener <rguenther@suse.de>
Fri, 24 Oct 2025 07:07:56 +0000 (09:07 +0200)
committerRichard Biener <rguenther@suse.de>
Fri, 24 Oct 2025 09:50:54 +0000 (11:50 +0200)
The following fixes the mixup between reduction operation and conversion
wrapping a reduction chain.  This also exposes a missed optimization
but I'm going to fix that in a followup.

PR tree-optimization/122406
* tree-vect-slp.cc (vect_analyze_slp_reduc_chain): Create
the SLP nodes for the conversions around the reduction
operation if required.

* gcc.dg/vect/vect-pr122406-1.c: New testcase.
* gcc.dg/vect/vect-pr122406-2.c: Likewise.

gcc/testsuite/gcc.dg/vect/vect-pr122406-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-pr122406-2.c [new file with mode: 0644]
gcc/tree-vect-slp.cc

diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr122406-1.c b/gcc/testsuite/gcc.dg/vect/vect-pr122406-1.c
new file mode 100644 (file)
index 0000000..a67a01c
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fwrapv" } */
+
+int f(long l, short *sp) {
+  unsigned short us;
+  for (; l; l -= 4, sp += 4)
+    us += sp[1] + sp[3];
+  return us;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr122406-2.c b/gcc/testsuite/gcc.dg/vect/vect-pr122406-2.c
new file mode 100644 (file)
index 0000000..0bce77d
--- /dev/null
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+
+int f(long l, short *sp) {
+  unsigned short us;
+  for (; l; l -= 4, sp += 4)
+    us += sp[1] + sp[3];
+  return us;
+}
index 3c760b495af2357c00a9ccec3b3e503da95ae91c..31d84857d49347a77d9df4cc0c6a2bc7f90d35cf 100644 (file)
@@ -4233,6 +4233,9 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
   if (fail)
     return false;
 
+  /* Remember a stmt with the actual reduction operation.  */
+  stmt_vec_info reduc_scalar_stmt = scalar_stmts[0];
+
   /* When the SSA def chain through reduc-idx does not form a natural
      reduction chain try to linearize an associative operation manually.  */
   if (scalar_stmts.length () == 1
@@ -4240,7 +4243,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
       && associative_tree_code ((tree_code)code)
       /* We may not associate if a fold-left reduction is required.  */
       && !needs_fold_left_reduction_p (TREE_TYPE (gimple_get_lhs
-                                                   (scalar_stmt->stmt)),
+                                                   (reduc_scalar_stmt->stmt)),
                                       code))
     {
       auto_vec<chain_op_t> chain;
@@ -4361,21 +4364,45 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
       VECT_REDUC_INFO_FN (reduc_info) = IFN_LAST;
       reduc_info->is_reduc_chain = true;
 
-      /* Build the node for the PHI and possibly the conversion(s?).  */
+      /* Build the node for the PHI and possibly the conversions.  */
       slp_tree phis = vect_create_new_slp_node (2, ERROR_MARK);
       SLP_TREE_REPRESENTATIVE (phis) = next_stmt;
       phis->cycle_info.id = cycle_id;
       SLP_TREE_LANES (phis) = group_size;
-      SLP_TREE_VECTYPE (phis) = SLP_TREE_VECTYPE (node);
+      if (reduc_scalar_stmt == scalar_stmt)
+       SLP_TREE_VECTYPE (phis) = SLP_TREE_VECTYPE (node);
+      else
+       SLP_TREE_VECTYPE (phis)
+         = signed_or_unsigned_type_for (TYPE_UNSIGNED
+                                          (TREE_TYPE (gimple_get_lhs
+                                                        (scalar_stmt->stmt))),
+                                        SLP_TREE_VECTYPE (node));
       /* ???  vect_cse_slp_nodes cannot cope with cycles without any
         SLP_TREE_SCALAR_STMTS.  */
       SLP_TREE_SCALAR_STMTS (phis).create (group_size);
       for (unsigned i = 0; i < group_size; ++i)
        SLP_TREE_SCALAR_STMTS (phis).quick_push (next_stmt);
 
+      slp_tree op_input = phis;
+      if (reduc_scalar_stmt != scalar_stmt)
+       {
+         slp_tree conv = vect_create_new_slp_node (1, ERROR_MARK);
+         SLP_TREE_REPRESENTATIVE (conv)
+           = vinfo->lookup_def (gimple_arg (reduc_scalar_stmt->stmt,
+                                            STMT_VINFO_REDUC_IDX
+                                              (reduc_scalar_stmt)));
+         SLP_TREE_CHILDREN (conv).quick_push (phis);
+         conv->cycle_info.id = cycle_id;
+         SLP_TREE_REDUC_IDX (conv) = 0;
+         SLP_TREE_LANES (conv) = group_size;
+         SLP_TREE_VECTYPE (conv) = SLP_TREE_VECTYPE (node);
+         SLP_TREE_SCALAR_STMTS (conv) = vNULL;
+         op_input = conv;
+       }
+
       slp_tree reduc = vect_create_new_slp_node (2, ERROR_MARK);
-      SLP_TREE_REPRESENTATIVE (reduc) = scalar_stmt;
-      SLP_TREE_CHILDREN (reduc).quick_push (phis);
+      SLP_TREE_REPRESENTATIVE (reduc) = reduc_scalar_stmt;
+      SLP_TREE_CHILDREN (reduc).quick_push (op_input);
       SLP_TREE_CHILDREN (reduc).quick_push (node);
       reduc->cycle_info.id = cycle_id;
       SLP_TREE_REDUC_IDX (reduc) = 0;
@@ -4383,10 +4410,27 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
       SLP_TREE_VECTYPE (reduc) = SLP_TREE_VECTYPE (node);
       /* ???  For the reduction epilogue we need a live lane.  */
       SLP_TREE_SCALAR_STMTS (reduc).create (group_size);
-      SLP_TREE_SCALAR_STMTS (reduc).quick_push (scalar_stmt);
+      SLP_TREE_SCALAR_STMTS (reduc).quick_push (reduc_scalar_stmt);
       for (unsigned i = 1; i < group_size; ++i)
        SLP_TREE_SCALAR_STMTS (reduc).quick_push (NULL);
 
+      if (reduc_scalar_stmt != scalar_stmt)
+       {
+         slp_tree conv = vect_create_new_slp_node (1, ERROR_MARK);
+         SLP_TREE_REPRESENTATIVE (conv) = scalar_stmt;
+         SLP_TREE_CHILDREN (conv).quick_push (reduc);
+         conv->cycle_info.id = cycle_id;
+         SLP_TREE_REDUC_IDX (conv) = 0;
+         SLP_TREE_LANES (conv) = group_size;
+         SLP_TREE_VECTYPE (conv) = SLP_TREE_VECTYPE (phis);
+         /* ???  For the reduction epilogue we need a live lane.  */
+         SLP_TREE_SCALAR_STMTS (conv).create (group_size);
+         SLP_TREE_SCALAR_STMTS (conv).quick_push (scalar_stmt);
+         for (unsigned i = 1; i < group_size; ++i)
+           SLP_TREE_SCALAR_STMTS (conv).quick_push (NULL);
+         reduc = conv;
+       }
+
       edge le = loop_latch_edge (LOOP_VINFO_LOOP (vinfo));
       SLP_TREE_CHILDREN (phis).quick_push (NULL);
       SLP_TREE_CHILDREN (phis).quick_push (NULL);