]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - gcc/tree-ssa-loop-manip.cc
Fix profile update in tree_transform_and_unroll_loop
[thirdparty/gcc.git] / gcc / tree-ssa-loop-manip.cc
index b2d3dcf406d45a8c5206fbdf7e0f20600c8a2b6a..8e3b1057b6ff67e870d69e44de94f8a19cef3de5 100644 (file)
@@ -1040,71 +1040,6 @@ determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
   *exit_bound = bound;
 }
 
-/* Scales the frequencies of all basic blocks in LOOP that are strictly
-   dominated by BB by NUM/DEN.  */
-
-static void
-scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
-                               profile_count num, profile_count den)
-{
-  basic_block son;
-
-  if (!den.nonzero_p () && !(num == profile_count::zero ()))
-    return;
-
-  for (son = first_dom_son (CDI_DOMINATORS, bb);
-       son;
-       son = next_dom_son (CDI_DOMINATORS, son))
-    {
-      if (!flow_bb_inside_loop_p (loop, son))
-       continue;
-      scale_bbs_frequencies_profile_count (&son, 1, num, den);
-      scale_dominated_blocks_in_loop (loop, son, num, den);
-    }
-}
-
-/* Return estimated niter for LOOP after unrolling by FACTOR times.  */
-
-gcov_type
-niter_for_unrolled_loop (class loop *loop, unsigned factor)
-{
-  gcc_assert (factor != 0);
-  bool profile_p = false;
-  gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
-  /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the
-     "+ 1" converts latch iterations to loop iterations and the "- 1"
-     converts back.  */
-  gcov_type new_est_niter = est_niter / factor;
-
-  if (est_niter == -1)
-    return -1;
-
-  /* Without profile feedback, loops for which we do not know a better estimate
-     are assumed to roll 10 times.  When we unroll such loop, it appears to
-     roll too little, and it may even seem to be cold.  To avoid this, we
-     ensure that the created loop appears to roll at least 5 times (but at
-     most as many times as before unrolling).  Don't do adjustment if profile
-     feedback is present.  */
-  if (new_est_niter < 5 && !profile_p)
-    {
-      if (est_niter < 5)
-       new_est_niter = est_niter;
-      else
-       new_est_niter = 5;
-    }
-
-  if (loop->any_upper_bound)
-    {
-      /* As above, this is really CEIL (upper_bound + 1, factor) - 1.  */
-      widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound,
-                                        factor);
-      if (wi::ltu_p (bound, new_est_niter))
-       new_est_niter = bound.to_uhwi ();
-    }
-
-  return new_est_niter;
-}
-
 /* Unroll LOOP FACTOR times.  LOOP is known to have a single exit edge
    whose source block dominates the latch.  DESC describes the number of
    iterations of LOOP.
@@ -1169,47 +1104,39 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
                                transform_callback transform,
                                void *data)
 {
-  gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
   unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
 
   enum tree_code exit_cmp;
   tree enter_main_cond, exit_base, exit_step, exit_bound;
+  bool flat = maybe_flat_loop_profile (loop);
   determine_exit_conditions (loop, desc, factor,
                             &enter_main_cond, &exit_base, &exit_step,
                             &exit_cmp, &exit_bound);
   bool single_loop_p = !exit_base;
 
-  /* Let us assume that the unrolled loop is quite likely to be entered.  */
-  profile_probability prob_entry;
-  if (integer_nonzerop (enter_main_cond))
-    prob_entry = profile_probability::always ();
-  else
-    prob_entry = profile_probability::guessed_always ()
-                       .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
-
   gcond *exit_if = nullptr;
   class loop *new_loop = nullptr;
   edge new_exit;
   if (!single_loop_p)
     {
-      edge exit = single_dom_exit (loop);
+      profile_count entry_count = loop_preheader_edge (loop)->src->count;
+      /* Let us assume that the unrolled loop is quite likely to be entered.  */
+      profile_probability prob_entry;
+      if (integer_nonzerop (enter_main_cond))
+       prob_entry = profile_probability::always ();
+      else
+       prob_entry = profile_probability::guessed_always ()
+                           .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
+
 
       /* The values for scales should keep profile consistent, and somewhat
-        close to correct.
-
-        TODO: The current value of SCALE_REST makes it appear that the loop
-        that is created by splitting the remaining iterations of the unrolled
-        loop is executed the same number of times as the original loop, and
-        with the same frequencies, which is obviously wrong.  This does not
-        appear to cause problems, so we do not bother with fixing it for now.
-        To make the profile correct, we would need to change the probability
-        of the exit edge of the loop, and recompute the distribution of
-        frequencies in its body because of this change (scale the frequencies
-        of blocks before and after the exit by appropriate factors).  */
-      profile_probability scale_unrolled = prob_entry;
+        close to correct.  */
       new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
-                              prob_entry.invert (), scale_unrolled,
-                              profile_probability::guessed_always (),
+                              prob_entry.invert (),
+                              prob_entry,
+                              /* We will later redirect exit from vectorized
+                                 loop to new_loop.  */
+                              profile_probability::always (),
                               true);
       gcc_assert (new_loop != NULL);
       update_ssa (TODO_update_ssa_no_phi);
@@ -1220,18 +1147,16 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
       edge precond_edge = single_pred_edge (rest);
       split_edge (loop_latch_edge (loop));
       basic_block exit_bb = single_pred (loop->latch);
+      edge exit = single_dom_exit (loop);
 
       /* Since the exit edge will be removed, the frequency of all the blocks
-        in the loop that are dominated by it must be scaled by
-        1 / (1 - exit->probability).  */
+        in the loop that are dominated by it must be scaled.  */
       if (exit->probability.initialized_p ())
        scale_dominated_blocks_in_loop (loop, exit->src,
                                        /* We are scaling up here so
                                           probability does not fit.  */
-                                       loop->header->count,
-                                       loop->header->count
-                                       - loop->header->count.apply_probability
-                                           (exit->probability));
+                                       exit->src->count,
+                                       exit->src->count - exit->count ());
 
       gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
       exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
@@ -1243,14 +1168,14 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
       rescan_loop_exit (new_exit, true, false);
 
       /* Set the probability of new exit to the same of the old one.  Fix
-        the frequency of the latch block, by scaling it back by
-        1 - exit->probability.  */
+        the count of the latch block.  */
       new_exit->probability = exit->probability;
       edge new_nonexit = single_pred_edge (loop->latch);
       new_nonexit->probability = exit->probability.invert ();
       new_nonexit->flags = EDGE_TRUE_VALUE;
-      if (new_nonexit->probability.initialized_p ())
-       scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability);
+      set_edge_probability_and_rescale_others
+             (exit, profile_probability::never ());
+      loop->latch->count = new_nonexit->count ();
 
       edge old_entry = loop_preheader_edge (loop);
       edge new_entry = loop_preheader_edge (new_loop);
@@ -1296,12 +1221,21 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
        }
 
       remove_path (exit);
+      /* We will later redirect exit from vectorized loop to new_loop.  */
+      loop_preheader_edge (new_loop)->src->count = entry_count;
 
       /* The epilog loop latch executes at most factor - 1 times.
         Since the epilog is entered unconditionally it will need to handle
         up to factor executions of its body.  */
-      new_loop->any_upper_bound = 1;
+      new_loop->any_upper_bound = true;
       new_loop->nb_iterations_upper_bound = factor - 1;
+      /* We do not really know estimate on number of iterations, since we do not
+        track any estimates modulo unroll factor.
+        Drop estimate from loop_info and scale loop profile.
+        It may be more realistic to scale loop profile to factor / 2 - 1,
+        but vectorizer also uses factor - 1.  */
+      new_loop->any_estimate = false;
+      scale_loop_profile (new_loop, profile_probability::always (), factor - 1);
     }
   else
     new_exit = single_dom_exit (loop);
@@ -1318,10 +1252,10 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
 
   auto_vec<edge> to_remove;
   bool ok
-    = gimple_duplicate_loop_body_to_header_edge (loop, loop_latch_edge (loop),
-                                                factor - 1, wont_exit,
-                                                new_exit, &to_remove,
-                                                DLTHE_FLAG_UPDATE_FREQ);
+    = gimple_duplicate_loop_body_to_header_edge
+           (loop, loop_latch_edge (loop), factor - 1, wont_exit,
+            new_exit, &to_remove,
+            DLTHE_FLAG_UPDATE_FREQ | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
   gcc_assert (ok);
 
   for (edge e : to_remove)
@@ -1332,36 +1266,25 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
   update_ssa (TODO_update_ssa);
 
   new_exit = single_dom_exit (loop);
+
+  /* gimple_duplicate_loop_body_to_header_edge depending on
+     DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
+     or scales it down accordingly.
+     However exit edge probability is kept as original.  Fix it if needed
+     and compensate.  */
+  profile_probability new_prob
+         = loop_preheader_edge
+                 (loop)->count ().probability_in (new_exit->src->count);
+  if (!(new_prob == new_exit->probability))
+    {
+      profile_count old_count = new_exit->src->count - new_exit->count ();
+      set_edge_probability_and_rescale_others (new_exit, new_prob);
+      profile_count new_count = new_exit->src->count - new_exit->count ();
+      scale_dominated_blocks_in_loop (loop, new_exit->src,
+                                     new_count, old_count);
+    }
   if (!single_loop_p)
     {
-      /* Ensure that the frequencies in the loop match the new estimated
-        number of iterations, and change the probability of the new
-        exit edge.  */
-
-      profile_count freq_h = loop->header->count;
-      profile_count freq_e = (loop_preheader_edge (loop))->count ();
-      if (freq_h.nonzero_p ())
-       {
-         /* Avoid dropping loop body profile counter to 0 because of zero
-            count in loop's preheader.  */
-         if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ()))
-           freq_e = freq_e.force_nonzero ();
-         scale_loop_frequencies (loop, freq_e.probability_in (freq_h));
-       }
-
-      basic_block rest = new_exit->dest;
-      new_exit->probability
-       = (profile_probability::always () / (new_est_niter + 1));
-
-      rest->count += new_exit->count ();
-
-      edge new_nonexit = single_pred_edge (loop->latch);
-      profile_probability prob = new_nonexit->probability;
-      new_nonexit->probability = new_exit->probability.invert ();
-      prob = new_nonexit->probability / prob;
-      if (prob.initialized_p ())
-       scale_bbs_frequencies (&loop->latch, 1, prob);
-
       /* Finally create the new counter for number of iterations and add
         the new exit instruction.  */
       tree ctr_before, ctr_after;
@@ -1374,66 +1297,15 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
       gimple_cond_set_rhs (exit_if, exit_bound);
       update_stmt (exit_if);
     }
-  else
-    {
-      /* gimple_duplicate_loop_to_header_edge has adjusted the loop body's
-        original profile counts in line with the unroll factor.  However,
-        the old counts might not have been consistent with the old
-        iteration count.
-
-        Therefore, if the iteration count is known exactly, make sure that the
-        profile counts of the loop header (and any other blocks that might be
-        executed in the final iteration) are consistent with the combination
-        of (a) the incoming profile count and (b) the new iteration count.  */
-      profile_count in_count = loop_preheader_edge (loop)->count ();
-      profile_count old_header_count = loop->header->count;
-      if (in_count.nonzero_p ()
-         && old_header_count.nonzero_p ()
-         && TREE_CODE (desc->niter) == INTEGER_CST)
-       {
-         /* The + 1 converts latch counts to iteration counts.  */
-         profile_count new_header_count = in_count * (new_est_niter + 1);
-         basic_block *body = get_loop_body (loop);
-         scale_bbs_frequencies_profile_count (body, loop->num_nodes,
-                                              new_header_count,
-                                              old_header_count);
-         free (body);
-       }
-
-      /* gimple_duplicate_loop_to_header_edge discarded FACTOR - 1
-        exit edges and adjusted the loop body's profile counts for the
-        new probabilities of the remaining non-exit edges.  However,
-        the remaining exit edge still has the same probability as it
-        did before, even though it is now more likely.
-
-        Therefore, all blocks executed after a failed exit test now have
-        a profile count that is too high, and the sum of the profile counts
-        for the header's incoming edges is greater than the profile count
-        of the header itself.
-
-        Adjust the profile counts of all code in the loop body after
-        the exit test so that the sum of the counts on entry to the
-        header agree.  */
-      profile_count old_latch_count = loop_latch_edge (loop)->count ();
-      profile_count new_latch_count = loop->header->count - in_count;
-      if (old_latch_count.nonzero_p () && new_latch_count.nonzero_p ())
-       scale_dominated_blocks_in_loop (loop, new_exit->src, new_latch_count,
-                                       old_latch_count);
-
-      /* Set the probability of the exit edge based on NEW_EST_NITER
-        (which estimates latch counts rather than iteration counts).
-        Update the probabilities of other edges to match.
-
-        If the profile counts are large enough to give the required
-        precision, the updates above will have made
-
-           e->dest->count / e->src->count ~= new e->probability
-
-        for every outgoing edge e of NEW_EXIT->src.  */
-      profile_probability new_exit_prob
-       = profile_probability::always () / (new_est_niter + 1);
-      change_edge_frequency (new_exit, new_exit_prob);
-    }
+  if (loop->any_upper_bound)
+    loop->nb_iterations_upper_bound = wi::udiv_floor
+               (loop->nb_iterations_upper_bound + 1, factor) - 1;
+  if (loop->any_likely_upper_bound)
+    loop->nb_iterations_likely_upper_bound = wi::udiv_floor
+               (loop->nb_iterations_likely_upper_bound + 1, factor) - 1;
+  if (loop->any_estimate)
+    loop->nb_iterations_estimate = wi::udiv_floor
+               (loop->nb_iterations_estimate + 1, factor) - 1;
 
   checking_verify_flow_info ();
   checking_verify_loop_structure ();