tree-optimization/115825 - improve unroll estimates for volatile accesses

author Richard Biener <rguenther@suse.de>

Wed, 10 Jul 2024 10:45:02 +0000 (12:45 +0200)

committer Richard Biener <rguenth@gcc.gnu.org>

Mon, 25 Nov 2024 07:17:54 +0000 (08:17 +0100)
author Richard Biener <rguenther@suse.de>
Wed, 10 Jul 2024 10:45:02 +0000 (12:45 +0200)
committer Richard Biener <rguenth@gcc.gnu.org>
Mon, 25 Nov 2024 07:17:54 +0000 (08:17 +0100)
diff --git a/gcc/testsuite/c-c++-common/ubsan/unreachable-3.c b/gcc/testsuite/c-c++-common/ubsan/unreachable-3.c

index b7a0d1aa92bf65d4e964200ba9ca756341a5bc19..8831a1fb187c9e5049df52b7b7aca18ce64b22ce 100644 (file)
--- a/gcc/testsuite/c-c++-common/ubsan/unreachable-3.c
+++ b/gcc/testsuite/c-c++-common/ubsan/unreachable-3.c
@@ -14,8 +14,9 @@ struct snic {
  void snic_log_q_error(struct snic *snic)
  {
      unsigned int i;
+#pragma GCC unroll 1
      for (i = 0; i < snic->wq_count; i++)
-        ioread32(&snic->wq[i]->error_status);
+      ioread32(&snic->wq[i]->error_status);
  }
  
  /* { dg-final { scan-tree-dump "__builtin___ubsan_handle_builtin_unreachable" "optimized" } } */
diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-20.C b/gcc/testsuite/g++.dg/warn/Warray-bounds-20.C

index f4876d8a269d2f825466759970c307edc5cd50f2..5fc552930747b8bd74d4ee6db9624dc8537eb92a 100644 (file)
--- a/gcc/testsuite/g++.dg/warn/Warray-bounds-20.C
+++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-20.C
@@ -53,8 +53,8 @@ void warn_derived_ctor_access_new_alloc ()
  
  void warn_derived_ctor_access_new_array_decl ()
  {
-  char b[sizeof (D1) * 2];    // { dg-message "at offset \\d+ into object 'b' of size 80" "LP64 note" { target lp64 } }
-                              // { dg-message "at offset \\d+ into object 'b' of size 40" "LP64 note" { target ilp32 } .-1 }
+  char b[sizeof (D1) * 2];    // { dg-message "at offset \\d+ into object 'b' of size 80" "LP64 note" { target { lp64 } xfail { lp64 } } }
+                              // { dg-message "at offset \\d+ into object 'b' of size 40" "LP64 note" { target { ilp32 } xfail { ilp32 } } .-1 }
    char *p = b;
    ++p;
    D1 *q = new (p) D1[2];
@@ -63,7 +63,7 @@ void warn_derived_ctor_access_new_array_decl ()
  
  void warn_derived_ctor_access_new_array_alloc ()
  {
-  char *p = new char[sizeof (D1) * 2];            // { dg-message "at offset \\d+ into object of size \\d+ allocated by '\[^\n\r]*operator new\[^\n\r]*" "note" }
+  char *p = new char[sizeof (D1) * 2];            // { dg-message "at offset \\d+ into object of size \\d+ allocated by '\[^\n\r]*operator new\[^\n\r]*" "note" { xfail *-*-* } }
    ++p;
    D1 *q = new (p) D1[2];
    sink (q);
diff --git a/gcc/testsuite/gcc.dg/pr94600-1.c b/gcc/testsuite/gcc.dg/pr94600-1.c

index 149e4f35dbee9dd06308347e080121a8be883276..d5fb4d169c4c1b3c84e64eaaec57af7942476b56 100644 (file)
--- a/gcc/testsuite/gcc.dg/pr94600-1.c
+++ b/gcc/testsuite/gcc.dg/pr94600-1.c
@@ -31,6 +31,7 @@ foo(void)
  {
    __SIZE_TYPE__ i;
    __SIZE_TYPE__ base = 0x000a0000;
+#pragma GCC unroll 5
    for (i = 0; i < (sizeof (a0) / sizeof ((a0)[0])); i++) {
      *(volatile t0 *) (base + 44 + i * 4) = a0[i];
    }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-17.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-17.c

new file mode 100644 (file)

index 0000000..282db99
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-17.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-tree-optimized" } */
+
+char volatile v;
+void for16 (void)
+{
+  for (char i = 16; i > 0; i -= 2)
+    v = i;
+}
+
+/* { dg-final { scan-tree-dump-times " ={v} " 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c

index b1d1c7d3d852c00ca269ab9fe49f717bf42a562c..d1122e068c4595518cd2b837c85ace5bd0d41e79 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c
@@ -13,5 +13,6 @@ test(int c)
         return;
      }
  }
+
  /* We are not able to get rid of the final conditional because the loop has two exits.  */
-/* { dg-final { scan-tree-dump "loop with 1 iterations completely unrolled" "cunroll"} } */
+/* { dg-final { scan-tree-dump "Not unrolling loop 1: contains call and code would grow" "cunroll"} } */
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc

index 0d496d738304aa73f2f64937aaf8f77006cc6dc3..9a94d82fc4e0757cfdc09fcd03528480de9d9bfd 100644 (file)
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -139,10 +139,15 @@ struct loop_size
       variable where induction variable starts at known constant.)  */
    int eliminated_by_peeling;
  
+  /* Number of instructions that cannot be further optimized in the
+     peeled loop, for example volatile accesses.  */
+  int not_eliminatable_after_peeling;
+
    /* Same statistics for last iteration of loop: it is smaller because
       instructions after exit are not executed.  */
    int last_iteration;
    int last_iteration_eliminated_by_peeling;
+  int last_iteration_not_eliminatable_after_peeling;
  
    /* If some IV computation will become constant.  */
    bool constant_iv;
@@ -267,8 +272,10 @@ tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
  
    size->overall = 0;
    size->eliminated_by_peeling = 0;
+  size->not_eliminatable_after_peeling = 0;
    size->last_iteration = 0;
    size->last_iteration_eliminated_by_peeling = 0;
+  size->last_iteration_not_eliminatable_after_peeling = 0;
    size->num_pure_calls_on_hot_path = 0;
    size->num_non_pure_calls_on_hot_path = 0;
    size->non_call_stmts_on_hot_path = 0;
@@ -292,6 +299,7 @@ tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
         {
           gimple *stmt = gsi_stmt (gsi);
           int num = estimate_num_insns (stmt, &eni_size_weights);
+         bool not_eliminatable_after_peeling = false;
           bool likely_eliminated = false;
           bool likely_eliminated_last = false;
           bool likely_eliminated_peeled = false;
@@ -304,7 +312,9 @@ tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
  
           /* Look for reasons why we might optimize this stmt away. */
  
-         if (!gimple_has_side_effects (stmt))
+         if (gimple_has_side_effects (stmt))
+           not_eliminatable_after_peeling = true;
+         else
             {
               /* Exit conditional.  */
               if (exit && body[i] == exit->src
@@ -377,11 +387,15 @@ tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
           size->overall += num;
           if (likely_eliminated || likely_eliminated_peeled)
             size->eliminated_by_peeling += num;
+         if (not_eliminatable_after_peeling)
+           size->not_eliminatable_after_peeling += num;
           if (!after_exit)
             {
               size->last_iteration += num;
               if (likely_eliminated || likely_eliminated_last)
                 size->last_iteration_eliminated_by_peeling += num;
+             if (not_eliminatable_after_peeling)
+               size->last_iteration_not_eliminatable_after_peeling += num;
             }
           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
               - size->last_iteration_eliminated_by_peeling) > upper_bound)
@@ -437,18 +451,24 @@ tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
     It is (NUNROLL + 1) * size of loop body with taking into account
     the fact that in last copy everything after exit conditional
     is dead and that some instructions will be eliminated after
-   peeling.  */
+   peeling.  Set *EST_ELIMINATED to the number of stmts that could be
+   optimistically eliminated by followup transforms.  */
  static unsigned HOST_WIDE_INT
  estimated_unrolled_size (struct loop_size *size,
+                        unsigned HOST_WIDE_INT *est_eliminated,
                          unsigned HOST_WIDE_INT nunroll)
  {
    HOST_WIDE_INT unr_insns = ((nunroll)
                              * (HOST_WIDE_INT) (size->overall
                                                 - size->eliminated_by_peeling));
-  if (!nunroll)
-    unr_insns = 0;
+  HOST_WIDE_INT not_elim
+    = ((nunroll) * (HOST_WIDE_INT) size->not_eliminatable_after_peeling);
    unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
+  not_elim += size->last_iteration_not_eliminatable_after_peeling;
  
+  /* Testcases rely on rounding up, so do not write as
+     (unr_insns - not_elim) / 3.  */
+  *est_eliminated = unr_insns - not_elim - (unr_insns - not_elim) * 2 / 3;
    return unr_insns;
  }
  
@@ -829,8 +849,9 @@ try_unroll_loop_completely (class loop *loop,
             }
  
           unsigned HOST_WIDE_INT ninsns = size.overall;
+         unsigned HOST_WIDE_INT est_eliminated;
           unsigned HOST_WIDE_INT unr_insns
-           = estimated_unrolled_size (&size, n_unroll);
+           = estimated_unrolled_size (&size, &est_eliminated, n_unroll);
           if (dump_file && (dump_flags & TDF_DETAILS))
             {
               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
@@ -842,7 +863,7 @@ try_unroll_loop_completely (class loop *loop,
              cautious on guessing if the unrolling is going to be
              profitable.
              Move from estimated_unrolled_size to unroll small loops.  */
-         if (unr_insns * 2 / 3
+         if (unr_insns - est_eliminated
               /* If there is IV variable that will become constant, we
                  save one instruction in the loop prologue we do not
                  account otherwise.  */
@@ -919,7 +940,7 @@ try_unroll_loop_completely (class loop *loop,
              2) Big loop after completely unroll may not be vectorized
              by BB vectorizer.  */
           else if ((cunrolli && !loop->inner
-                   ? unr_insns : unr_insns * 2 / 3)
+                   ? unr_insns : unr_insns - est_eliminated)
                    > (unsigned) param_max_completely_peeled_insns)
             {
               if (dump_file && (dump_flags & TDF_DETAILS))
author	Richard Biener <rguenther@suse.de>
	Wed, 10 Jul 2024 10:45:02 +0000 (12:45 +0200)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Mon, 25 Nov 2024 07:17:54 +0000 (08:17 +0100)
gcc/testsuite/c-c++-common/ubsan/unreachable-3.c		patch \| blob \| blame \| history
gcc/testsuite/g++.dg/warn/Warray-bounds-20.C		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/pr94600-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/tree-ssa/cunroll-17.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c		patch \| blob \| blame \| history
gcc/tree-ssa-loop-ivcanon.cc		patch \| blob \| blame \| history