From: Jan Hubicka Date: Thu, 4 Sep 2025 15:23:20 +0000 (+0200) Subject: Fix scalng of auto-fdo profiles in liner X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1da3c4d90e678af0fed89c5638c97a41e5e04547;p=thirdparty%2Fgcc.git Fix scalng of auto-fdo profiles in liner with auto-fdo it is possible that function bar with non-zero profile is inlined into foo with zero profile and foo is the only caller of it. In this case we currently scale bar to also have zero profile which makes it optimized for size. With normal profiles this does not happen, since basic blocks with non-zero count must have some way to be reached. This patch makes inliner to scale caller in this case which mitigates the problem (to some degree). Bootstrapped/regtested x86_64-linux, plan to commit it shortly. gcc/ChangeLog: * ipa-inline-transform.cc (inline_call): If function with AFDO profile is inlined into function with GUESSED_GLOBAL0_AFDO or GUESSED_GLOBAL0_ADJUSTED, scale caller to AFDO profile. * profile-count.h (profile_count::apply_scale): If num is AFDO and den is not GUESSED, make result AFDO rather then GUESSED. --- diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc index 9d759d218b5..5c244bc17ac 100644 --- a/gcc/ipa-inline-transform.cc +++ b/gcc/ipa-inline-transform.cc @@ -344,6 +344,40 @@ inline_call (struct cgraph_edge *e, bool update_original, to = e->caller; if (to->inlined_to) to = to->inlined_to; + + /* In case callee has AFDO profile but caller has GLOBAL0 we need + to re-scale it so it can have non-zero AFDO profile. */ + if (callee->count.quality () == AFDO + && e->count.nonzero_p () + && (to->count.quality () == GUESSED_GLOBAL0_AFDO + || to->count.quality () == GUESSED_GLOBAL0_ADJUSTED)) + { + profile_count num = callee->count; + profile_count den = e->count; + profile_count::adjust_for_ipa_scaling (&num, &den); + if (dump_file) + { + fprintf (dump_file, "Rescalling profile of caller %s " + "to allow non-zero AFDO counts:", + to->dump_name ()); + den.dump (dump_file); + fprintf (dump_file, " -> "); + num.dump (dump_file); + fprintf (dump_file, "\n"); + } + to->apply_scale (num, den); + to->frequency = std::max (to->frequency, callee->frequency); + /* Do not update original, so possible additional calls of callee + are handled reasonably well. */ + update_original = false; + gcc_checking_assert (to->count.quality () == AFDO); + if (dump_file) + { + fprintf (dump_file, "Scaled profile of %s: ", to->dump_name ()); + to->count.dump (dump_file); + fprintf (dump_file, "\n"); + } + } if (to->thunk) { struct cgraph_node *target = to->callees->callee; diff --git a/gcc/profile-count.h b/gcc/profile-count.h index c893aec577c..65c4596a2b0 100644 --- a/gcc/profile-count.h +++ b/gcc/profile-count.h @@ -1212,8 +1212,18 @@ public: /* Be sure that ret is not local if num is global. Also ensure that ret is not global0 when num is global. */ if (num.ipa_p ()) - ret.m_quality = MAX (ret.m_quality, - num == num.ipa () ? GUESSED : num.m_quality); + { + /* This is common case of AFDO scaling when we upgrade + GLOBAL0_AFDO function to AFDO. Be sure that result + is AFDO and not GUESSED (which is unnecesarily low). */ + if (num.m_quality == AFDO + && (ret.m_quality != GUESSED + && ret.m_quality != GUESSED_LOCAL)) + ret.m_quality = AFDO; + else + ret.m_quality = MAX (ret.m_quality, + num == num.ipa () ? GUESSED : num.m_quality); + } return ret; }