]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Fix profile updating in ipa-cp
authorJan Hubicka <hubicka@ucw.cz>
Fri, 6 Jun 2025 15:57:00 +0000 (17:57 +0200)
committerJan Hubicka <hubicka@ucw.cz>
Mon, 9 Jun 2025 14:47:54 +0000 (16:47 +0200)
Bootstrapping with autoprofiledbootstrap, LTO and checking enables ICEs in WPA
because we end up mixing local and IPA count in
ipa-cp.cc:update_specialized_profile.  This is because of missing call to
profile_count::adjust_for_ipa_scaling.  While looking into that I however
noticed that the function forgets to update indirect call edges. This made me
to commonize same logic which currently exists in clone_inlined_nodes,
update_specialized_profile, update_profiling_info and
update_counts_for_self_gen_clones.

While testing it I noticed that we also ICE when linking with
-fdump-ipa-all-details-blocks since IPA and local counts are temporarily mixed
during IPA transformation stage, so I also added check to profile_count::dump
to not crash and added verifier to gimple_verify_flow_info.

Other problem I also noticed is that while profile updates done by inliner (via
cgraph_node::clone) are correctly using global0 profiles instead of erasing
profile completely when IPA counts drops to 0, the scaling in ipa-cp is not
doing that, so we lose info and possibly some code quality.  I will fix that
incrementally. Similarly ipa-split, when offlining region with 0 entry count
may re-do frequency propagation to get something useful.

gcc/ChangeLog:

* cgraph.cc (cgraph_node::apply_scale): New member function.
* cgraph.h (struct cgraph_node): declare.
* ipa-cp.cc (update_counts_for_self_gen_clones):
Use cgraph_node::apply_scale.
(update_profiling_info): Do not overwrite local
profile when dropping to 0 global profile.
(update_specialized_profile): Likewise.
* ipa-inline-transform.cc (update_noncloned_counts): Remove.
(can_remove_node_now_p_1): Fix formating.
(clone_inlined_nodes): Use cgraph_node::apply_scale.
* profile-count.cc (profile_count::dump): Do not ICE
when count is not compatible with entry block count.
* tree-cfg.cc (gimple_verify_flow_info): Check
compatibility of count and entry block count.

gcc/cgraph.cc
gcc/cgraph.h
gcc/ipa-cp.cc
gcc/ipa-inline-transform.cc
gcc/profile-count.cc
gcc/tree-cfg.cc

index 3f95ca1fa85c33962c5e46ce4c48f3aaf8efe211..4a037a7bab10374954fe2df1764c5800dcfb24a9 100644 (file)
@@ -179,6 +179,26 @@ cgraph_node::function_version (void)
   return cgraph_fnver_htab->find (&key);
 }
 
+/* Scale profile by NUM/DEN.  Walk into inlined clones.  */
+
+void
+cgraph_node::apply_scale (profile_count num, profile_count den)
+{
+  struct cgraph_edge *e;
+
+  profile_count::adjust_for_ipa_scaling (&num, &den);
+
+  for (e = callees; e; e = e->next_callee)
+    {
+      if (!e->inline_failed)
+       e->callee->apply_scale (num, den);
+      e->count = e->count.apply_scale (num, den);
+    }
+  for (e = indirect_calls; e; e = e->next_callee)
+    e->count = e->count.apply_scale (num, den);
+  count = count.apply_scale (num, den);
+}
+
 /* Insert a new cgraph_function_version_info node into cgraph_fnver_htab
    corresponding to cgraph_node NODE.  */
 cgraph_function_version_info *
index 8dbe36eac09df79321079d3f4fbe70c891aa6930..ba9a8a25e396964f522aedda7c5891fd5ed4d297 100644 (file)
@@ -1256,6 +1256,9 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
      it is not used in any other non-standard way.  */
   bool only_called_directly_p (void);
 
+  /* Scale profile by NUM/DEN.  Walk into inlined clones.  */
+  void apply_scale (profile_count num, profile_count den);
+
   /* Return true when function is only called directly or it has alias.
      i.e. it is not externally visible, address was not taken and
      it is not used in any other non-standard way.  */
index 73cf9040fad7fecb16ba1418872e85ee07fdc71c..92e234e6716285c1ab55590b784af8741df90e61 100644 (file)
@@ -4666,19 +4666,12 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node,
   unsigned i = 0;
   for (cgraph_node *n : self_gen_clones)
     {
-      profile_count orig_count = n->count;
       profile_count new_count
        = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
       new_count = lenient_count_portion_handling (new_count, orig_node);
-      n->count = new_count;
-      profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
+      n->apply_scale (new_count, n->count);
       for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
-       {
-         cs->count = cs->count.apply_scale (new_count, orig_count);
-         processed_edges.add (cs);
-       }
-      for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
-       cs->count = cs->count.apply_scale (new_count, orig_count);
+       processed_edges.add (cs);
 
       i++;
     }
@@ -4811,20 +4804,24 @@ update_profiling_info (struct cgraph_node *orig_node,
              /* The NEW_NODE count and counts of all its outgoing edges
                 are still unmodified copies of ORIG_NODE's.  Just clear
                 the latter and bail out.  */
-             profile_count zero;
               if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
-                zero = profile_count::zero ().guessed_local ();
+               orig_node->count = orig_node->count.guessed_local ();
              else
-               zero = profile_count::adjusted_zero ();
-             orig_node->count = zero;
+               orig_node->count = orig_node->count.global0adjusted ();
              for (cgraph_edge *cs = orig_node->callees;
                   cs;
                   cs = cs->next_callee)
-               cs->count = zero;
+               if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
+                 cs->count = orig_node->count.guessed_local ();
+               else
+                 cs->count = orig_node->count.global0adjusted ();
              for (cgraph_edge *cs = orig_node->indirect_calls;
                   cs;
                   cs = cs->next_callee)
-               cs->count = zero;
+               if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
+                 cs->count = orig_node->count.guessed_local ();
+               else
+                 cs->count = orig_node->count.global0adjusted ();
              return;
            }
        }
@@ -4874,26 +4871,12 @@ update_profiling_info (struct cgraph_node *orig_node,
                                                orig_node);
 
   new_sum = orig_node_count.combine_with_ipa_count (new_sum);
-  new_node->count = new_sum;
   orig_node->count = remainder;
 
-  profile_count orig_new_node_count = orig_node_count;
-  profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
-  for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
-    cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
-  for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
-    cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
+  new_node->apply_scale (new_sum, new_node->count);
 
   if (!orig_edges_processed)
-    {
-      profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
-      for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
-       cs->count = cs->count.apply_scale (remainder, orig_node_count);
-      for (cgraph_edge *cs = orig_node->indirect_calls;
-          cs;
-          cs = cs->next_callee)
-       cs->count = cs->count.apply_scale (remainder, orig_node_count);
-    }
+    orig_node->apply_scale (remainder, orig_node->count);
 
   if (dump_file)
     {
@@ -4911,35 +4894,25 @@ update_specialized_profile (struct cgraph_node *new_node,
                            struct cgraph_node *orig_node,
                            profile_count redirected_sum)
 {
-  struct cgraph_edge *cs;
-  profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
-
   if (dump_file)
     {
       fprintf (dump_file, "    the sum of counts of redirected  edges is ");
       redirected_sum.dump (dump_file);
       fprintf (dump_file, "\n    old ipa count of the original node is ");
-      orig_node_count.dump (dump_file);
+      orig_node->count.dump (dump_file);
       fprintf (dump_file, "\n");
     }
-  if (!orig_node_count.nonzero_p ())
+  if (!orig_node->count.ipa ().nonzero_p ()
+      || !redirected_sum.nonzero_p ())
     return;
 
-  new_node_count = new_node->count;
-  new_node->count += redirected_sum;
-  orig_node->count
-    = lenient_count_portion_handling (orig_node->count - redirected_sum,
-                                     orig_node);
+  orig_node->apply_scale
+    (lenient_count_portion_handling (orig_node->count.ipa () - redirected_sum,
+                                    orig_node),
+     orig_node->count);
 
-  for (cs = new_node->callees; cs; cs = cs->next_callee)
-    cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
-
-  for (cs = orig_node->callees; cs; cs = cs->next_callee)
-    {
-      profile_count dec = cs->count.apply_scale (redirected_sum,
-                                                orig_node_count);
-      cs->count -= dec;
-    }
+  new_node->apply_scale (new_node->count.ipa () + redirected_sum,
+                        new_node->count);
 
   if (dump_file)
     {
index 46b8e5bb679080f45bd2efdb12aebe62cd9c26d0..3c6a84570b7f51a1dbc37aac22564028c5f33e1e 100644 (file)
@@ -58,27 +58,6 @@ along with GCC; see the file COPYING3.  If not see
 int ncalls_inlined;
 int nfunctions_inlined;
 
-/* Scale counts of NODE edges by NUM/DEN.  */
-
-static void
-update_noncloned_counts (struct cgraph_node *node,
-                        profile_count num, profile_count den)
-{
-  struct cgraph_edge *e;
-
-  profile_count::adjust_for_ipa_scaling (&num, &den);
-
-  for (e = node->callees; e; e = e->next_callee)
-    {
-      if (!e->inline_failed)
-        update_noncloned_counts (e->callee, num, den);
-      e->count = e->count.apply_scale (num, den);
-    }
-  for (e = node->indirect_calls; e; e = e->next_callee)
-    e->count = e->count.apply_scale (num, den);
-  node->count = node->count.apply_scale (num, den);
-}
-
 /* We removed or are going to remove the last call to NODE.
    Return true if we can and want proactively remove the NODE now.
    This is important to do, since we want inliner to know when offline
@@ -93,7 +72,7 @@ can_remove_node_now_p_1 (struct cgraph_node *node, struct cgraph_edge *e)
     {
       cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
       if ((alias->callers && alias->callers != e)
-          || !can_remove_node_now_p_1 (alias, e))
+         || !can_remove_node_now_p_1 (alias, e))
        return false;
     }
   /* FIXME: When address is taken of DECL_EXTERNAL function we still
@@ -212,7 +191,7 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
            }
          duplicate = false;
          e->callee->externally_visible = false;
-          update_noncloned_counts (e->callee, e->count, e->callee->count);
+         e->callee->apply_scale (e->count, e->callee->count);
 
          dump_callgraph_transformation (e->callee, inlining_into,
                                         "inlining to");
index 22c109ab528c0bcce094d484042346e5100a340b..e857cddea7eeeea0310c5b190dac1d94e69bc823 100644 (file)
@@ -94,9 +94,16 @@ profile_count::dump (FILE *f, struct function *fun) const
   else if (fun && initialized_p ()
           && fun->cfg
           && ENTRY_BLOCK_PTR_FOR_FN (fun)->count.initialized_p ())
-    fprintf (f, "%" PRId64 " (%s, freq %.4f)", m_val,
-            profile_quality_display_names[m_quality],
-            to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (fun)->count).to_double ());
+    {
+      if (compatible_p (ENTRY_BLOCK_PTR_FOR_FN (fun)->count))
+       fprintf (f, "%" PRId64 " (%s, freq %.4f)", m_val,
+                profile_quality_display_names[m_quality],
+                to_sreal_scale
+                  (ENTRY_BLOCK_PTR_FOR_FN (fun)->count).to_double ());
+      else
+       fprintf (f, "%" PRId64 " (%s, incompatible with entry block count)",
+                m_val, profile_quality_display_names[m_quality]);
+    }
   else
     fprintf (f, "%" PRId64 " (%s)", m_val,
             profile_quality_display_names[m_quality]);
index b342b147716a83b3a60229fbae6c0dafb0d41a4e..fad308e7f7b7b02daa1d2695d83ba81714e944f6 100644 (file)
@@ -5757,6 +5757,12 @@ gimple_verify_flow_info (void)
       error ("probability of edge from entry block not initialized");
       err = true;
     }
+  if (!EXIT_BLOCK_PTR_FOR_FN (cfun)
+       ->count.compatible_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count))
+    {
+      error ("exit block count is not compoatible with entry block count");
+      err = true;
+    }
 
 
   FOR_EACH_BB_FN (bb, cfun)
@@ -5780,6 +5786,12 @@ gimple_verify_flow_info (void)
                err = true;
              }
         }
+      if (!bb->count.compatible_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count))
+       {
+         error ("count of bb %d is not compoatible with entry block count",
+                bb->index);
+         err = true;
+       }
 
       /* Skip labels on the start of basic block.  */
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))