]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/115438 - SLP reduction vect vs. bwaves
authorRichard Biener <rguenther@suse.de>
Fri, 29 Nov 2024 11:14:24 +0000 (12:14 +0100)
committerRichard Biener <rguenth@gcc.gnu.org>
Fri, 29 Nov 2024 12:43:25 +0000 (13:43 +0100)
503.bwaves_r shows a case where the non-SLP optimization of performing
the reduction adjustment with the initial value as part of the epilogue
rather than including it as part of the initial vector value.  It allows
to break a critical dependence path.  The following restores this
ability for single-lane SLP.

On Zen2 this turns a 2.5% regression from GCC 14 into a 2.5%
improvement.

PR tree-optimization/115438
* tree-vect-loop.cc (vect_transform_cycle_phi): For SLP also
try to do the reduction adjustment by the initial value
in the epilogue.

gcc/tree-vect-loop.cc

index 8c9be48ef0f1d19406103559d9c56a08c1209711..5a24fb8bf4c8139d8dfa2f9959003a797382b3e3 100644 (file)
@@ -9193,6 +9193,20 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
              tree neutral_op
                = neutral_op_for_reduction (TREE_TYPE (vectype_out),
                                            code, initial_value);
+             /* Try to simplify the vector initialization by applying an
+                adjustment after the reduction has been performed.  This
+                can also break a critical path but on the other hand
+                requires to keep the initial value live across the loop.  */
+             if (neutral_op
+                 && initial_values.length () == 1
+                 && !reduc_info->reused_accumulator
+                 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+                 && !operand_equal_p (neutral_op, initial_values[0]))
+               {
+                 STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)
+                   = initial_values[0];
+                 initial_values[0] = neutral_op;
+               }
              get_initial_defs_for_reduction (loop_vinfo, reduc_info,
                                              &vec_initial_defs, vec_num,
                                              stmts.length (), neutral_op);