tree-optimization/115841 - reduction epilogue placement issue

author Richard Biener <rguenther@suse.de>

Tue, 16 Jul 2024 09:53:17 +0000 (11:53 +0200)

committer Richard Biener <rguenth@gcc.gnu.org>

Tue, 16 Jul 2024 14:05:11 +0000 (16:05 +0200)
author Richard Biener <rguenther@suse.de>
Tue, 16 Jul 2024 09:53:17 +0000 (11:53 +0200)
committer Richard Biener <rguenth@gcc.gnu.org>
Tue, 16 Jul 2024 14:05:11 +0000 (16:05 +0200)
diff --git a/gcc/testsuite/gcc.dg/vect/pr115841.c b/gcc/testsuite/gcc.dg/vect/pr115841.c

new file mode 100644 (file)

index 0000000..aa5c660
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115841.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -fcommon -fvect-cost-model=dynamic --param vect-partial-vector-usage=1" } */
+/* { dg-additional-options "-mavx512vl" { target avx512vl } } */
+
+/* To trigger the bug costing needs to determine that aligning the A170
+   accesses with a prologue is good and there should be a vectorized
+   epilogue with a smaller vector size, re-using the vector accumulator
+   from the vectorized main loop that's statically known to execute
+   but the epilogue loop is not.  */
+
+static unsigned char xl[192];
+unsigned char A170[192*3];
+
+void jerate (unsigned char *, unsigned char *);
+float foo (unsigned n)
+{
+  jerate (xl, A170);
+
+  unsigned i = 32;
+  int kr = 1;
+  float sfn11s = 0.f;
+  float sfn12s = 0.f;
+  do
+    {
+      int krm1 = kr - 1;
+      long j = krm1;
+      float a = (*(float(*)[n])A170)[j];
+      float b = (*(float(*)[n])xl)[j];
+      float c = a * b;
+      float d = c * 6.93149983882904052734375e-1f;
+      float e = (*(float(*)[n])A170)[j+48];
+      float f = (*(float(*)[n])A170)[j+96];
+      float g = d * e;
+      sfn11s = sfn11s + g;
+      float h = f * d;
+      sfn12s = sfn12s + h;
+      kr++;
+    }
+  while (--i != 0);
+  float tem = sfn11s + sfn12s;
+  return tem;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc

index a64b5082bd18c09ab99685fce953adaf77f8d3ca..b8124a32128045d2ae8d041f5dc879c4d7a4f8e4 100644 (file)
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -9026,14 +9026,15 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
           /* And the reduction could be carried out using a different sign.  */
           if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
             def = gimple_convert (&stmts, vectype_out, def);
-         if (loop_vinfo->main_loop_edge)
+         edge e;
+         if ((e = loop_vinfo->main_loop_edge)
+             || (e = loop_vinfo->skip_this_loop_edge))
             {
               /* While we'd like to insert on the edge this will split
                  blocks and disturb bookkeeping, we also will eventually
                  need this on the skip edge.  Rely on sinking to
                  fixup optimal placement and insert in the pred.  */
-             gimple_stmt_iterator gsi
-               = gsi_last_bb (loop_vinfo->main_loop_edge->src);
+             gimple_stmt_iterator gsi = gsi_last_bb (e->src);
               /* Insert before a cond that eventually skips the
                  epilogue.  */
               if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
author	Richard Biener <rguenther@suse.de>
	Tue, 16 Jul 2024 09:53:17 +0000 (11:53 +0200)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Tue, 16 Jul 2024 14:05:11 +0000 (16:05 +0200)
gcc/testsuite/gcc.dg/vect/pr115841.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-loop.cc		patch \| blob \| blame \| history