]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/120687 - avoid disturbing reduction chains in reassoc
authorRichard Biener <rguenther@suse.de>
Tue, 29 Jul 2025 08:05:32 +0000 (10:05 +0200)
committerRichard Biener <rguenther@suse.de>
Tue, 29 Jul 2025 10:13:00 +0000 (12:13 +0200)
Reassoc carefully ranks operands to form reduction chains for
vectorization so we are careful to not apply any width related
changes in the early pass.  Unfortunately we are not careful
enough.  The following gates fma related re-ordering and also
the >= 3 ops tail "optimization" which is the culprit here.

This does not fix the reported inefficient vectorization when
using signed integer reductions yet.

PR tree-optimization/120687
* tree-ssa-reassoc.cc (reassociate_bb): Do not disturb
the sorted operand order in the early pass.
* tree-vect-slp.cc (vect_analyze_slp): Dump when a detected
reduction chain fails SLP discovery.

* gcc.dg/vect/pr120687-1.c: New testcase.
* gcc.dg/vect/pr120687-2.c: Likewise.

gcc/testsuite/gcc.dg/vect/pr120687-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/pr120687-2.c [new file with mode: 0644]
gcc/tree-ssa-reassoc.cc
gcc/tree-vect-slp.cc

diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-1.c b/gcc/testsuite/gcc.dg/vect/pr120687-1.c
new file mode 100644 (file)
index 0000000..ce9cf63
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+unsigned
+frd (unsigned *p, unsigned *lastone)
+{
+  unsigned sum = 0;
+  for (; p <= lastone; p += 16)
+    sum += p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7]
+           + p[8] + p[9] + p[10] + p[11] + p[12] + p[13] + p[14] + p[15];
+  return sum;
+}
+
+/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-2.c b/gcc/testsuite/gcc.dg/vect/pr120687-2.c
new file mode 100644 (file)
index 0000000..dfc6dc7
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-additional-options "-ffast-math" } */
+
+float
+frd (float *p, float *lastone)
+{
+  float sum = 0;
+  for (; p <= lastone; p += 16)
+    sum += p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7]
+           + p[8] + p[9] + p[10] + p[11] + p[12] + p[13] + p[14] + p[15];
+  return sum;
+}
+
+/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
index 3c38f3d7a19fc0d3901d822715e2aa0b253bd030..c140f76766eb0072e4468bf06bc23a2ea4fa4ebb 100644 (file)
@@ -7167,9 +7167,10 @@ reassociate_bb (basic_block bb)
 
                  /* If the target support FMA, rank_ops_for_fma will detect if
                     the chain has fmas and rearrange the ops if so.  */
-                 if (direct_internal_fn_supported_p (IFN_FMA,
-                                                     TREE_TYPE (lhs),
-                                                     opt_type)
+                 if (!reassoc_insert_powi_p
+                     && direct_internal_fn_supported_p (IFN_FMA,
+                                                        TREE_TYPE (lhs),
+                                                        opt_type)
                      && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
                    {
                      mult_num = rank_ops_for_fma (&ops);
@@ -7200,7 +7201,8 @@ reassociate_bb (basic_block bb)
                         to make sure the ones that get the double
                         binary op are chosen wisely.  */
                      int len = ops.length ();
-                     if (len >= 3
+                     if (!reassoc_insert_powi_p
+                         && len >= 3
                          && (!has_fma
                              /* width > 1 means ranking ops results in better
                                 parallelism.  Check current value to avoid
index cb27d166c553493adff771c0f277c41713449e13..a9c7105f47e67d2db3d48cf9c1b562c5fa75bed3 100644 (file)
@@ -4950,6 +4950,9 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
                                                 max_tree_size, &limit,
                                                 force_single_lane))
          {
+           if (dump_enabled_p ())
+             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                              "SLP discovery of reduction chain failed\n");
            /* Dissolve reduction chain group.  */
            stmt_vec_info vinfo = first_element;
            stmt_vec_info last = NULL;