]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
re PR tree-optimization/68398 (coremark regression due to r229685)
authorJeff Law <law@redhat.com>
Wed, 27 Jan 2016 19:19:47 +0000 (12:19 -0700)
committerJeff Law <law@gcc.gnu.org>
Wed, 27 Jan 2016 19:19:47 +0000 (12:19 -0700)
PR tree-optimization/68398
* params.def (PARAM_FSM_SCALE_PATH_STMTS): New parameter.
(PARAM_FSM_SCALE_PATH_BLOCKS): Likewise.
* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
Only count PHIs in the last block in the path.  The others will
const/copy propagate away.  Add heuristic to allow more irreducible
subloops to be created when it is likely profitable to do so.

* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
Fix typo in comment.  Use gsi_after_labels and remove the GIMPLE_LABEL
check from within the loop.  Use gsi_next_nondebug rather than gsi_next.

PR tree-optimization/68398
* gcc.dg/tree-ssa/pr66752-3.c: Update expected output.
* gcc.dg/tree-ssa/ssa-dom-thread-2c.c: Add extra statements on thread
path to avoid new heuristic allowing more irreducible regions
* gcc.dg/tree-ssa/ssa-dom-thread-2d.c: Likewise.
* gcc.dg/tree-ssa/vrp46.c: Likewise.
* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Update expected output.
* gcc.dg/tree-ssa/ssa-dom-thread-2g.c: New test.
* gcc.dg/tree-ssa/ssa-dom-thread-2h.c: Likewise.

From-SVN: r232897

gcc/ChangeLog
gcc/params.def
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/tree-ssa/pr66752-3.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2c.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2d.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2g.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2h.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
gcc/testsuite/gcc.dg/tree-ssa/vrp46.c
gcc/tree-ssa-threadbackward.c

index 197306031a78189e8c20cbcb295e59680f2a2412..76c7af2e6ef3ba6868462ba300746e5bd01e17e0 100644 (file)
@@ -1,3 +1,18 @@
+2016-01-27  Jeff Law  <law@redhat.com>
+
+       PR tree-optimization/68398
+       PR tree-optimization/69196
+       * params.def (PARAM_FSM_SCALE_PATH_STMTS): New parameter.
+       (PARAM_FSM_SCALE_PATH_BLOCKS): Likewise.
+       * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
+       Only count PHIs in the last block in the path.  The others will
+       const/copy propagate away.  Add heuristic to allow more irreducible
+       subloops to be created when it is likely profitable to do so.
+
+       * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
+       Fix typo in comment.  Use gsi_after_labels and remove the GIMPLE_LABEL
+       check from within the loop.  Use gsi_next_nondebug rather than gsi_next.
+
 2016-01-27  Jakub Jelinek  <jakub@redhat.com>
 
        PR lto/69254
index 88971c7c0617296eeaff208700f3d597cd73fcd8..0722ad7d9898c269265be79db05d1d18ec8835ec 100644 (file)
@@ -1145,6 +1145,16 @@ DEFPARAM (PARAM_CHKP_MAX_CTOR_SIZE,
          "constructor generated by Pointer Bounds Checker.",
          5000, 100, 0)
 
+DEFPARAM (PARAM_FSM_SCALE_PATH_STMTS,
+         "fsm-scale-path-stmts",
+         "Scale factor to apply to the number of statements in a threading path when comparing to the number of (scaled) blocks.",
+         2, 1, 10)
+
+DEFPARAM (PARAM_FSM_SCALE_PATH_BLOCKS,
+         "fsm-scale-path-blocks",
+         "Scale factor to apply to the number of blocks in a threading path when comparing to the number of (scaled) statements.",
+         3, 1, 10)
+
 DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS,
          "max-fsm-thread-path-insns",
          "Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path.",
index 2cad0c2585bde363a48d022ad92f0e3142c27d39..22a124a1ab396e166ae9d7d9a4ab60c426969cd7 100644 (file)
@@ -1,3 +1,16 @@
+2016-01-25  Jeff Law  <law@redhat.com>
+
+       PR tree-optimization/68398
+        PR tree-optimization/69196
+       * gcc.dg/tree-ssa/pr66752-3.c: Update expected output.
+       * gcc.dg/tree-ssa/ssa-dom-thread-2c.c: Add extra statements on thread
+       path to avoid new heuristic allowing more irreducible regions
+       * gcc.dg/tree-ssa/ssa-dom-thread-2d.c: Likewise.
+       * gcc.dg/tree-ssa/vrp46.c: Likewise.
+       * gcc.dg/tree-ssa/ssa-dom-thread-7.c: Update expected output.
+       * gcc.dg/tree-ssa/ssa-dom-thread-2g.c: New test.
+       * gcc.dg/tree-ssa/ssa-dom-thread-2h.c: Likewise.
+
 2016-01-27  Marek Polacek  <polacek@redhat.com>
 
        PR c/68062
index 6eeaca50c5fdb8c70b8e99aa5e6c2d9f43cb3f38..2949cbb5158d6a9e7367c20e854acfa03f91720f 100644 (file)
@@ -32,10 +32,9 @@ foo (int N, int c, int b, int *a)
    pt--;
 }
 
-/* There are 3 FSM jump threading opportunities, one of which will
-   get filtered.  */
-/* { dg-final { scan-tree-dump-times "Registering FSM" 2 "vrp1"} } */
-/* { dg-final { scan-tree-dump-times "FSM would create irreducible loop" 1 "vrp1"} } */
+/* There are 3 FSM jump threading opportunities, all of which will be
+   realized, which will eliminate testing of FLAG, completely.  */
+/* { dg-final { scan-tree-dump-times "Registering FSM" 3 "vrp1"} } */
 
 /* There should be no assignments or references to FLAG.  */
 /* { dg-final { scan-tree-dump-not "flag" "optimized"} } */
index f3e37bd8d383fcbe9018763c54e5c925a87d75cc..f91467614c0794bfdf148bba7bf7265bbf5bd7ae 100644 (file)
@@ -1,4 +1,4 @@
-/* { dg-do compile } */ 
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
 
 void foo();
@@ -15,6 +15,9 @@ void dont_thread_1 (void)
 
   do
     {
+      bla ();
+      bla ();
+      bla ();
       if (first)
        foo ();
       else
index 9b4637a22a03980edb28ec33193201d7b52900d9..a04aabf2215bea60bb4d3a4680ba481e3bd302ce 100644 (file)
@@ -1,4 +1,4 @@
-/* { dg-do compile } */ 
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
 
 void foo();
@@ -13,6 +13,9 @@ void dont_thread_2 (int first)
 
   do
     {
+      bla ();
+      bla ();
+      bla ();
       if (first)
        foo ();
       else
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2g.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2g.c
new file mode 100644 (file)
index 0000000..6d1ff5d
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
+
+void foo();
+void bla();
+void bar();
+
+void dont_thread_1 (void)
+{
+  int i = 0;
+  int first = 1;
+
+  do
+    {
+      if (first)
+       foo ();
+      else
+       bar ();
+
+      first = 0;
+      bla ();
+    } while (i++ < 100);
+}
+
+/* { dg-final { scan-tree-dump "Jumps threaded: 2" "vrp1"} } */
+/* { dg-final { scan-tree-dump "Jumps threaded: 1" "dom2"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2h.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2h.c
new file mode 100644 (file)
index 0000000..61705e1
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
+
+void foo();
+void bla();
+void bar();
+
+/* Avoid threading in the following case, to prevent creating subloops.  */
+
+void dont_thread_2 (int first)
+{
+  int i = 0;
+
+  do
+    {
+      if (first)
+       foo ();
+      else
+       bar ();
+
+      first = 0;
+      bla ();
+    } while (i++ < 100);
+}
+
+/* Peeling off the first iteration would make threading through
+   the loop latch safe, but we don't do that currently.  */
+/* { dg-final { scan-tree-dump "Jumps threaded: 1" "vrp1"} } */
+/* { dg-final { scan-tree-dump "Jumps threaded: 1" "dom2"} } */
index 93c767c352cc12a18a9a5a5cecc85d004d9ed228..a7a737be0ad38b821ee71d2c23f88dbea543f523 100644 (file)
@@ -1,8 +1,9 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats -fdump-tree-dom3-stats" } */
-/* { dg-final { scan-tree-dump "Jumps threaded: 7"  "vrp1" } } */
+/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats" } */
+/* { dg-final { scan-tree-dump "Jumps threaded: 19"  "vrp1" } } */
 /* { dg-final { scan-tree-dump "Jumps threaded: 12" "dom2" } } */
-/* { dg-final { scan-tree-dump "Jumps threaded: 3"  "dom3" } } */
+/* { dg-final { scan-tree-dump-not "Jumps threaded"  "dom3" } } */
+/* { dg-final { scan-tree-dump-not "Jumps threaded"  "vrp2" } } */
 
 enum STATE {
   S0=0,
index 64c65d0b193858983d81e38721e2b72256343302..8923eb404d3df06b0e1c00b0d4cb945ed8dd289e 100644 (file)
@@ -12,6 +12,8 @@ func_18 ( int t )
   for (0; 1; ++l_889)
     {
       int t1 = 0;
+      func_98 (0);
+      func_98 (0);
       if (func_81 (1))
        {
          int rhs = l_895;
index 131630e160334a4b29d8eceafe9ff184909c9203..735009cb702c1fe31ad18e94ceb671e6a48c7a39 100644 (file)
@@ -266,7 +266,7 @@ fsm_find_control_statement_thread_paths (tree name,
          basic_block bb = (*path)[j];
 
          /* Remember, blocks in the path are stored in opposite order
-            in the PATH array.  The last entry in the array reprensents
+            in the PATH array.  The last entry in the array represents
             the block with an outgoing edge that we will redirect to the
             jump threading path.  Thus we don't care about that block's
             loop father, nor how many statements are in that block because
@@ -280,33 +280,19 @@ fsm_find_control_statement_thread_paths (tree name,
                  break;
                }
 
-             for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+             for (gsi = gsi_after_labels (bb);
+                  !gsi_end_p (gsi);
+                  gsi_next_nondebug (&gsi))
                {
                  gimple *stmt = gsi_stmt (gsi);
                  /* Do not count empty statements and labels.  */
                  if (gimple_code (stmt) != GIMPLE_NOP
-                     && gimple_code (stmt) != GIMPLE_LABEL
                      && !(gimple_code (stmt) == GIMPLE_ASSIGN
                           && gimple_assign_rhs_code (stmt) == ASSERT_EXPR)
                      && !is_gimple_debug (stmt))
                    ++n_insns;
                }
 
-             gphi_iterator gsip;
-             for (gsip = gsi_start_phis (bb);
-                  !gsi_end_p (gsip);
-                  gsi_next (&gsip))
-               {
-                 gphi *phi = gsip.phi ();
-                 tree dst = gimple_phi_result (phi);
-
-                 /* We consider any non-virtual PHI as a statement since it
-                    count result in a constant assignment or copy
-                    operation.  */
-                 if (!virtual_operand_p (dst))
-                   ++n_insns;
-               }
-
              /* We do not look at the block with the threaded branch
                 in this loop.  So if any block with a last statement that
                 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
@@ -360,6 +346,24 @@ fsm_find_control_statement_thread_paths (tree name,
              == DOMST_NONDOMINATING))
        creates_irreducible_loop = true;
 
+      /* PHIs in the final target and only the final target will need
+        to be duplicated.  So only count those against the number
+        of statements.  */
+      gphi_iterator gsip;
+      for (gsip = gsi_start_phis (taken_edge->dest);
+          !gsi_end_p (gsip);
+          gsi_next (&gsip))
+       {
+         gphi *phi = gsip.phi ();
+         tree dst = gimple_phi_result (phi);
+
+         /* We consider any non-virtual PHI as a statement since it
+            count result in a constant assignment or copy
+            operation.  */
+         if (!virtual_operand_p (dst))
+           ++n_insns;
+       }
+
       if (path_crosses_loops)
        {
          if (dump_file && (dump_flags & TDF_DETAILS))
@@ -379,10 +383,18 @@ fsm_find_control_statement_thread_paths (tree name,
          continue;
        }
 
-      /* We avoid creating irreducible loops unless we thread through
+      /* We avoid creating irreducible inner loops unless we thread through
         a multiway branch, in which case we have deemed it worth losing other
-        loop optimizations later.  */
-      if (!threaded_multiway_branch && creates_irreducible_loop)
+        loop optimizations later.
+
+        We also consider it worth creating an irreducible inner loop if
+        the number of copied statement is low relative to the length of
+        the path -- in that case there's little the traditional loop optimizer
+        would have done anyway, so an irreducible loop is not so bad.  */
+      if (!threaded_multiway_branch && creates_irreducible_loop
+         && (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
+             > path_length * PARAM_VALUE (PARAM_FSM_SCALE_PATH_BLOCKS)))
+
        {
          if (dump_file && (dump_flags & TDF_DETAILS))
            fprintf (dump_file,