]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Fix profile update in copy-header
authorJan Hubicka <jh@suse.cz>
Sat, 1 Jul 2023 11:05:34 +0000 (13:05 +0200)
committerJan Hubicka <jh@suse.cz>
Sat, 1 Jul 2023 11:05:34 +0000 (13:05 +0200)
Most common source of profile mismatches is now copyheader pass.  The reason is that
in comon case the duplicated header condition will become constant true and that needs
changes in the loop exit condition probability.

While this can be done by jump threading it is not, since it gives up on loops.
Copy header pass now has logic to prove that first exit will become true, so this
patch adds necessary pumbing to the profile updating.
This is done in gimple_duplicate_sese_region in a way that is specific for this
particular case.  I think general case is kind-of unsolvable and loop-ch is the
only user of the infrastructure.  If we later invent some new users, maybe we
can export the region and region_copy arrays and let user to do the update.

With the patch we now get:

Pass dump id and name            |static mismat|dynamic mismatch
                                 |in count     |in count
107t cunrolli                    |      3    +3|        19237       +19237
127t ch                          |     13   +10|        19237
131t dom                         |     39   +26|        19237
133t isolate-paths               |     47    +8|        19237
134t reassoc                     |     49    +2|        19237
136t forwprop                    |     53    +4|       226943      +207706
159t cddce                       |     61    +8|       242222       +15279
161t ldist                       |     62    +1|       242222
172t ifcvt                       |     66    +4|       415472      +173250
173t vect                        |    143   +77|     10859784    +10444312
176t cunroll                     |    294  +151|    150357763   +139497979
183t loopdone                    |    291    -3|    150289533       -68230
194t tracer                      |    322   +31|    153230990     +2941457
195t fre                         |    317    -5|    153230990
197t dom                         |    286   -31|    154448079     +1217089
199t threadfull                  |    293    +7|    154724763      +276684
200t vrp                         |    297    +4|    155042448      +317685
204t dce                         |    294    -3|    155017073       -25375
206t sink                        |    292    -2|    155017073
211t cddce                       |    298    +6|    155018657        +1584
255t optimized                   |    296    -2|    155018657
256r expand                      |    273   -23|    154592622      -426035
258r into_cfglayout              |    268    -5|    154592661          +39
275r loop2_unroll                |    272    +4|    159701866     +5109205
291r ce2                         |    270    -2|    159723509
312r pro_and_epilogue            |    290   +20|    159792505       +68996
315r jump2                       |    296    +6|    164234016     +4441511
323r bbro                        |    294    -2|    159385430     -4848586

So ch introduces 10 new mismatches while originally it did 308.  At bbro the
number of mismatches dropped from 432 to 294.
Most offender is now cunroll pass. I think it is the case where loop has multiple
exits and one of exits becomes to be false in all but last peeled iteration.

This is another case where non-trivial loop update is needed.

Honza

gcc/ChangeLog:

* tree-cfg.cc (gimple_duplicate_sese_region): Add elliminated_edge
parmaeter; update profile.
* tree-cfg.h (gimple_duplicate_sese_region): Update prototype.
* tree-ssa-loop-ch.cc (entry_loop_condition_is_static): Rename to ...
(static_loop_exit): ... this; return the edge to be elliminated.
(ch_base::copy_headers): Handle profile updating for eliminated exits.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ifc-20040816-1.c: Reduce number of mismatches
from 2 to 1.
* gcc.dg/tree-ssa/loop-ch-profile-1.c: New test.
* gcc.dg/tree-ssa/loop-ch-profile-2.c: New test.

gcc/testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c
gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-2.c [new file with mode: 0644]
gcc/tree-cfg.cc
gcc/tree-cfg.h
gcc/tree-ssa-loop-ch.cc

index f8a6495cbaa7b1cb5bb4b2ecf26a5d629e58e741..b55a533e374fba04cae76b1b56c3ed6a56111760 100644 (file)
@@ -39,4 +39,4 @@ int main1 ()
    which is folded by vectorizer.  Both outgoing edges must have probability
    100% so the resulting profile match after folding.  */
 /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 2 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c
new file mode 100644 (file)
index 0000000..e8bab62
--- /dev/null
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-ch2-blocks-details -fdump-tree-optimized" } */
+void foo ();
+void test(int v, int q)
+{
+       for (int i = 0; i < 10 && v/q; i++)
+               foo ();
+}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "ch2"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-2.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-2.c
new file mode 100644 (file)
index 0000000..99d22ba
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-ch2-blocks-details -fdump-tree-optimized" } */
+void foo ();
+void test()
+{
+       for (int i = 0; i < 10; i++)
+               foo ();
+}
+/* We should figure out that after header dulication loop iterates 9 times.  */
+/* { dg-final { scan-tree-dump "90.00" "ch2"} } */
+/* { dg-final { scan-tree-dump "10.00" "ch2"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "ch2"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized"} } */
index 30f26af69f2878feae7edb6f807e7d212be578aa..4989906706c003abeb951cb76d37fad02fba6fdf 100644 (file)
@@ -6658,13 +6658,17 @@ add_phi_args_after_copy (basic_block *region_copy, unsigned n_region,
    blocks are stored to REGION_COPY in the same order as they had in REGION,
    provided that REGION_COPY is not NULL.
    The function returns false if it is unable to copy the region,
-   true otherwise.  */
+   true otherwise.
+
+   ELIMINATED_EDGE is an edge that is known to be removed in the dupicated
+   region.  */
 
 bool
 gimple_duplicate_sese_region (edge entry, edge exit,
-                           basic_block *region, unsigned n_region,
-                           basic_block *region_copy,
-                           bool update_dominance)
+                             basic_block *region, unsigned n_region,
+                             basic_block *region_copy,
+                             bool update_dominance,
+                             edge eliminated_edge)
 {
   unsigned i;
   bool free_region_copy = false, copying_header = false;
@@ -6743,11 +6747,92 @@ gimple_duplicate_sese_region (edge entry, edge exit,
            split_edge_bb_loc (entry), update_dominance);
   if (total_count.initialized_p () && entry_count.initialized_p ())
     {
-      scale_bbs_frequencies_profile_count (region, n_region,
-                                          total_count - entry_count,
-                                          total_count);
-      scale_bbs_frequencies_profile_count (region_copy, n_region, entry_count,
-                                          total_count);
+      if (!eliminated_edge)
+       {
+         scale_bbs_frequencies_profile_count (region, n_region,
+                                              total_count - entry_count,
+                                              total_count);
+         scale_bbs_frequencies_profile_count (region_copy, n_region,
+                                              entry_count, total_count);
+       }
+      else
+       {
+         /* We only support only case where eliminated_edge is one and it
+            exists first BB.  We also assume that the duplicated region is
+            acyclic.  So we expect the following:
+
+              // region_copy_start entry will be scaled to entry_count
+                if (cond1)         <- this condition will become false
+                                      and we update probabilities
+                  goto loop_exit;
+                if (cond2)
+                  goto loop_exit;
+                goto loop_header   <- this will be redirected to loop.
+              // region_copy_end
+            loop:
+                      <body>
+              // region start
+            loop_header:
+                      if (cond1)   <- we need to update probabbility here
+                        goto loop_exit;
+                      if (cond2)   <- and determine scaling factor here.
+                        goto loop_exit;
+                      else
+                        goto loop;
+              // region end
+
+            Adding support for more exits can be done similarly,
+            but only consumer so far is tree-ssa-loop-ch and it uses only this
+            to handle the common case of peeling headers which have
+            conditionals known to be always true upon entry.  */
+         gcc_assert (eliminated_edge->src == region[0]
+                     && EDGE_COUNT (region[0]->succs) == 2
+                     && copying_header);
+
+         edge e, e_copy, eliminated_edge_copy;
+         if (EDGE_SUCC (region[0], 0) == eliminated_edge)
+           {
+             e = EDGE_SUCC (region[0], 1);
+             e_copy = EDGE_SUCC (region_copy[0], 1);
+             eliminated_edge_copy = EDGE_SUCC (region_copy[0], 0);
+           }
+         else
+           {
+             e = EDGE_SUCC (region[0], 0);
+             e_copy = EDGE_SUCC (region_copy[0], 0);
+             eliminated_edge_copy = EDGE_SUCC (region_copy[0], 1);
+           }
+         gcc_checking_assert (e != e_copy
+                              && eliminated_edge_copy != eliminated_edge
+                              && eliminated_edge_copy->dest
+                                 == eliminated_edge->dest);
+
+
+         /* Handle first basic block in duplicated region as in the
+            non-eliminating case.  */
+         scale_bbs_frequencies_profile_count (region_copy, n_region,
+                                              entry_count, total_count);
+         /* Now update redirecting eliminated edge to the other edge.
+            Actual CFG update is done by caller.  */
+         e_copy->probability = profile_probability::always ();
+         eliminated_edge_copy->probability = profile_probability::never ();
+         /* Header copying is a special case of jump threading, so use
+            common code to update loop body exit condition.  */
+         update_bb_profile_for_threading (region[0], e_copy->count (), e);
+         /* If we duplicated more conditionals first scale the profile of
+            rest of the preheader.  Then work out the probability of
+            entering the loop and scale rest of the loop.  */
+         if (n_region > 1)
+           {
+             scale_bbs_frequencies_profile_count (region_copy + 1,
+                                                  n_region - 1,
+                                                  e_copy->count (),
+                                                  region_copy[1]->count);
+             scale_bbs_frequencies_profile_count (region + 1, n_region - 1,
+                                                  e->count (),
+                                                  region[1]->count);
+           }
+       }
     }
 
   if (copying_header)
index 57865c58c90d96142b4d691ea50c5c2b34260899..b9ccd58c3db5cd99eb1545240fba87ae42afa3ea 100644 (file)
@@ -70,7 +70,7 @@ extern void add_phi_args_after_copy_bb (basic_block);
 extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
 extern basic_block split_edge_bb_loc (edge);
 extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, unsigned,
-                                       basic_block *, bool);
+                                       basic_block *, bool, edge);
 extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
                                      basic_block *);
 extern void gather_blocks_in_sese_region (basic_block entry, basic_block exit,
index 22252bee1352f9afaca7819ba0d542495e56347e..291f2dbcab91f33c0f8a8ce494b2cb687e18761b 100644 (file)
@@ -59,17 +59,18 @@ edge_range_query (irange &r, edge e, gcond *cond, gimple_ranger &ranger)
     r.set_varying (boolean_type_node);
 }
 
-/* Return true if the condition on the first iteration of the loop can
-   be statically determined.  */
+/* Return edge that is true in the first iteration of the loop
+   and NULL otherwise.  */
 
-static bool
-entry_loop_condition_is_static (class loop *l, gimple_ranger *ranger)
+static edge
+static_loop_exit (class loop *l, gimple_ranger *ranger)
 {
   edge e = loop_preheader_edge (l);
   gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (e->dest));
+  edge ret_e;
 
   if (!last)
-    return false;
+    return NULL;
 
   edge true_e, false_e;
   extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
@@ -77,17 +78,23 @@ entry_loop_condition_is_static (class loop *l, gimple_ranger *ranger)
   /* If neither edge is the exit edge, this is not a case we'd like to
      special-case.  */
   if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e))
-    return false;
+    return NULL;
 
   int_range<1> desired_static_range;
   if (loop_exit_edge_p (l, true_e))
-    desired_static_range = range_false ();
+   {
+      desired_static_range = range_false ();
+      ret_e = true_e;
+   }
   else
+  {
     desired_static_range = range_true ();
+    ret_e = false_e;
+  }
 
   int_range<2> r;
   edge_range_query (r, e, last, *ranger);
-  return r == desired_static_range;
+  return r == desired_static_range ? ret_e : NULL;
 }
 
 /* Check whether we should duplicate HEADER of LOOP.  At most *LIMIT
@@ -394,7 +401,8 @@ ch_base::copy_headers (function *fun)
   copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
   bbs_size = n_basic_blocks_for_fn (fun);
 
-  auto_vec<loop_p> candidates;
+  struct candidate {class loop *loop; edge static_exit;};
+  auto_vec<struct candidate> candidates;
   auto_vec<std::pair<edge, loop_p> > copied;
   auto_vec<class loop *> loops_to_unloop;
   auto_vec<int> loops_to_unloop_nunroll;
@@ -427,12 +435,14 @@ ch_base::copy_headers (function *fun)
          || !process_loop_p (loop))
        continue;
 
+      edge static_exit = static_loop_exit (loop, ranger);
+
       /* Avoid loop header copying when optimizing for size unless we can
         determine that the loop condition is static in the first
         iteration.  */
       if (optimize_loop_for_size_p (loop)
          && !loop->force_vectorize
-         && !entry_loop_condition_is_static (loop, ranger))
+         && !static_exit)
        {
          if (dump_file && (dump_flags & TDF_DETAILS))
            fprintf (dump_file,
@@ -442,13 +452,14 @@ ch_base::copy_headers (function *fun)
        }
 
       if (should_duplicate_loop_header_p (header, loop, &remaining_limit))
-       candidates.safe_push (loop);
+       candidates.safe_push ({loop, static_exit});
     }
   /* Do not use ranger after we change the IL and not have updated SSA.  */
   delete ranger;
 
-  for (auto loop : candidates)
+  for (auto candidate : candidates)
     {
+      class loop *loop = candidate.loop;
       int initial_limit = param_max_loop_header_insns;
       int remaining_limit = initial_limit;
       if (dump_file && (dump_flags & TDF_DETAILS))
@@ -501,11 +512,17 @@ ch_base::copy_headers (function *fun)
        continue;
 
       if (dump_file && (dump_flags & TDF_DETAILS))
-       fprintf (dump_file,
-                "Duplicating header of the loop %d up to edge %d->%d,"
-                " %i insns.\n",
-                loop->num, exit->src->index, exit->dest->index,
-                initial_limit - remaining_limit);
+       {
+         fprintf (dump_file,
+                  "Duplicating header of the loop %d up to edge %d->%d,"
+                  " %i insns.\n",
+                  loop->num, exit->src->index, exit->dest->index,
+                  initial_limit - remaining_limit);
+         if (candidate.static_exit)
+           fprintf (dump_file,
+                    "  Will eliminate peeled conditional in bb %d.\n",
+                    candidate.static_exit->src->index);
+       }
 
       /* Ensure that the header will have just the latch as a predecessor
         inside the loop.  */
@@ -519,7 +536,7 @@ ch_base::copy_headers (function *fun)
 
       propagate_threaded_block_debug_into (exit->dest, entry->dest);
       if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs,
-                                        true))
+                                        true, candidate.static_exit))
        {
          if (dump_file && (dump_flags & TDF_DETAILS))
            fprintf (dump_file, "Duplication failed.\n");