Add cutoff information to profile_info and use it when forcing non-zero value

author Jan Hubicka <hubicka@ucw.cz>

Sun, 6 Jul 2025 12:42:54 +0000 (14:42 +0200)

committer Jan Hubicka <hubicka@ucw.cz>

Sun, 6 Jul 2025 12:42:54 +0000 (14:42 +0200)
author Jan Hubicka <hubicka@ucw.cz>
Sun, 6 Jul 2025 12:42:54 +0000 (14:42 +0200)
committer Jan Hubicka <hubicka@ucw.cz>
Sun, 6 Jul 2025 12:42:54 +0000 (14:42 +0200)
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc

index 64f4cda1b52d6e74287c3ae8b2949b659972c3a7..a970eb8972fadd133135d2feff1b8c3a9dd940e8 100644 (file)
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -2522,6 +2522,7 @@ autofdo_source_profile::read ()
      afdo_count_scale
        = MAX (((gcov_type)1 << (profile_count::n_bits / 2))
              / afdo_profile_info->sum_max, 1);
+  afdo_profile_info->cutoff *= afdo_count_scale;
    afdo_hot_bb_threshod
      = hot_frac
        ? afdo_profile_info->sum_max * afdo_count_scale / hot_frac
@@ -2531,10 +2532,12 @@ autofdo_source_profile::read ()
      fprintf (dump_file, "Max count in profile %" PRIu64 "\n"
                         "Setting scale %" PRIu64 "\n"
                         "Scaled max count %" PRIu64 "\n"
+                       "Cutoff %" PRIu64 "\n"
                         "Hot count threshold %" PRIu64 "\n\n",
              (int64_t)afdo_profile_info->sum_max,
              (int64_t)afdo_count_scale,
              (int64_t)(afdo_profile_info->sum_max * afdo_count_scale),
+            (int64_t)afdo_profile_info->cutoff,
              (int64_t)afdo_hot_bb_threshod);
    afdo_profile_info->sum_max *= afdo_count_scale;
    return true;
@@ -3865,6 +3868,7 @@ read_autofdo_file (void)
    autofdo::afdo_profile_info = XNEW (gcov_summary);
    autofdo::afdo_profile_info->runs = 1;
    autofdo::afdo_profile_info->sum_max = 0;
+  autofdo::afdo_profile_info->cutoff = 1;
  
    /* Read the profile from the profile file.  */
    autofdo::read_profile ();
diff --git a/gcc/coverage.cc b/gcc/coverage.cc

index dd3ed2ed8429c5e97a1ffcae81a6146e37de80cb..75a24c614486c5d87dbd1fa05ef4f85a76600f3f 100644 (file)
--- a/gcc/coverage.cc
+++ b/gcc/coverage.cc
@@ -238,6 +238,7 @@ read_counts_file (void)
           gcov_profile_info = profile_info = XCNEW (gcov_summary);
           profile_info->runs = gcov_read_unsigned ();
           profile_info->sum_max = gcov_read_unsigned ();
+         profile_info->cutoff = 1;
         }
        else if (GCOV_TAG_IS_COUNTER (tag) && fn_ident)
         {
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h

index d48291c1fe353e98ddced0101b68fa57e4065638..f3e3a1c08da8b5a327c0fe12635c4db31a40edf4 100644 (file)
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -349,6 +349,11 @@ struct gcov_summary
  {
    gcov_unsigned_t runs;                /* Number of program runs.  */
    gcov_type sum_max;           /* Sum of individual run max values.  */
+  gcov_type cutoff;            /* Values smaller than this value are not
+                                  reliable (0 may mean non-zero).
+                                  For read profile cutoff is typically 1
+                                  however when we scale up or use auto-fdo
+                                  it may become bigger value.  */
  };
  
  #if !defined(inhibit_libc)
diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc

index ca605b027dcfdeb09d8ed7611cb21da0349566dc..0cf97a80687e5d1a3f67db72c63e2e110bb59e4e 100644 (file)
--- a/gcc/ipa-inline.cc
+++ b/gcc/ipa-inline.cc
@@ -2222,6 +2222,7 @@ inline_small_functions (void)
  
    gcc_assert (in_lto_p
               || !(max_count > 0)
+             || flag_auto_profile
               || (profile_info && flag_branch_probabilities));
  
    while (!edge_heap.empty ())
diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc

index ec34f659d6a4b30c30e656fca01006a5216044ac..0af2e889af85b343eabbb671c4a8b445ed5d81c3 100644 (file)
--- a/gcc/lto-cgraph.cc
+++ b/gcc/lto-cgraph.cc
@@ -718,11 +718,12 @@ output_profile_summary (struct lto_simple_output_block *ob)
  {
    if (profile_info)
      {
-      /* We do not output num and run_max, they are not used by
-         GCC profile feedback and they are difficult to merge from multiple
-         units.  */
        unsigned runs = (profile_info->runs);
        streamer_write_uhwi_stream (ob->main_stream, runs);
+      streamer_write_gcov_count_stream (ob->main_stream,
+                                       profile_info->sum_max);
+      streamer_write_gcov_count_stream (ob->main_stream,
+                                       profile_info->cutoff);
  
        /* IPA-profile computes hot bb threshold based on cumulated
          whole program profile.  We need to stream it down to ltrans.  */
@@ -1678,6 +1679,8 @@ input_profile_summary (class lto_input_block *ib,
    if (runs)
      {
        file_data->profile_info.runs = runs;
+      file_data->profile_info.sum_max = streamer_read_gcov_count (ib);
+      file_data->profile_info.cutoff = streamer_read_gcov_count (ib);
  
        /* IPA-profile computes hot bb threshold based on cumulated
          whole program profile.  We need to stream it down to ltrans.  */
@@ -1719,6 +1722,8 @@ merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
  
    profile_info = XCNEW (gcov_summary);
    profile_info->runs = max_runs;
+  profile_info->sum_max = 0;
+  profile_info->cutoff = 0;
  
    /* If merging already happent at WPA time, we are done.  */
    if (flag_ltrans)
@@ -1735,6 +1740,14 @@ merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
  
         scale = RDIV (node->count_materialization_scale * max_runs,
                        node->lto_file_data->profile_info.runs);
+       gcov_type sum_max = RDIV (node->lto_file_data->profile_info.sum_max * max_runs,
+                                 node->lto_file_data->profile_info.runs);
+       gcov_type cutoff = RDIV (node->lto_file_data->profile_info.cutoff * max_runs,
+                                node->lto_file_data->profile_info.runs);
+       if (sum_max > profile_info->sum_max)
+         profile_info->sum_max = sum_max;
+       if (cutoff > profile_info->cutoff)
+         profile_info->cutoff = cutoff;
         node->count_materialization_scale = scale;
         if (scale < 0)
           fatal_error (input_location, "Profile information in %s corrupted",
diff --git a/gcc/profile-count.cc b/gcc/profile-count.cc

index 21477008b702654540647570b66161b31129226d..8f05a79a43722f876e317aa87d413a8b3f2dfc41 100644 (file)
--- a/gcc/profile-count.cc
+++ b/gcc/profile-count.cc
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "cgraph.h"
  #include "wide-int.h"
  #include "sreal.h"
+#include "profile.h"
  
  /* Names from profile_quality enum values.  */
  
@@ -570,3 +571,27 @@ profile_count::operator*= (const sreal &num)
  {
    return *this * num;
  }
+
+/* Make counter forcibly nonzero.  */
+profile_count
+profile_count::force_nonzero () const
+{
+  if (!initialized_p ())
+    return *this;
+  profile_count ret = *this;
+  /* Generally values are forced non-zero to handle inconsistent profile 
+     where count 0 needs to be scaled up to non-zero.
+
+     Use cutoff value here to avoid situation where profile has large
+     cutoff and we perform count = count * num / den where num is non-zero
+     and den is 0.   If profile was scaled by large factor, forcing value
+     to 1 would lead to large scale factor.  */
+  gcov_unsigned_t small = profile_info ? profile_info->cutoff / 2 + 1
+                         : 1;
+  if (ret.m_val < small)
+    {
+      ret.m_val = small;
+      ret.m_quality = MIN (m_quality, ADJUSTED);
+    }
+  return ret;
+}
diff --git a/gcc/profile-count.h b/gcc/profile-count.h

index 216054033c52172c2960cab2c4c89f5ed48d0061..20c03a292382ef65565976a7f5e4f5ed9f756e3d 100644 (file)
--- a/gcc/profile-count.h
+++ b/gcc/profile-count.h
@@ -1112,18 +1112,7 @@ public:
      }
  
    /* Make counter forcibly nonzero.  */
-  profile_count force_nonzero () const
-    {
-      if (!initialized_p ())
-       return *this;
-      profile_count ret = *this;
-      if (ret.m_val == 0)
-       {
-         ret.m_val = 1;
-         ret.m_quality = MIN (m_quality, ADJUSTED);
-       }
-      return ret;
-    }
+  profile_count force_nonzero () const;
  
    profile_count max (profile_count other) const
      {
diff --git a/gcc/testsuite/gcc.dg/tree-prof/clone-merge-1.c b/gcc/testsuite/gcc.dg/tree-prof/clone-merge-1.c

index 43a909054b50cdebe5049396b5048247d9ebf369..904dd0cfb28afdf6d7966da2c4de29a97e16f289 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-prof/clone-merge-1.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/clone-merge-1.c
@@ -31,4 +31,4 @@ int main()
  }
  /* We will have profiles for test2 and test2.constprop.0 that will have to be
     merged,  */
-/* { dg-final-use-autofdo { scan-ipa-dump "Merging duplicate symbol test2" "afdo_offline"} } */
+/* { dg-final-use-autofdo { scan-ipa-dump "Merging duplicate instance: test2" "afdo_offline"} } */
author	Jan Hubicka <hubicka@ucw.cz>
	Sun, 6 Jul 2025 12:42:54 +0000 (14:42 +0200)
committer	Jan Hubicka <hubicka@ucw.cz>
	Sun, 6 Jul 2025 12:42:54 +0000 (14:42 +0200)
gcc/auto-profile.cc		patch \| blob \| blame \| history
gcc/coverage.cc		patch \| blob \| blame \| history
gcc/gcov-io.h		patch \| blob \| blame \| history
gcc/ipa-inline.cc		patch \| blob \| blame \| history
gcc/lto-cgraph.cc		patch \| blob \| blame \| history
gcc/profile-count.cc		patch \| blob \| blame \| history
gcc/profile-count.h		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/tree-prof/clone-merge-1.c		patch \| blob \| blame \| history