From: Jan Hubicka Date: Wed, 15 Oct 2025 07:49:21 +0000 (+0200) Subject: Cleanup max of profile_count X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a93f80feeef744649dc426a59a5860224d13e68d;p=thirdparty%2Fgcc.git Cleanup max of profile_count profile_count::max is not implemented same way as other arithmetics on profile counts which generally require counts to be compatible and returns minimum of qualities of input counts. Reason is that originally it was used to compute statistics of whole callgraph profile so inliner weights can be scaled to reasonable integers interprocedurally. It also combines qulities weird way so the same counter could be used to determine what quality of profile is available. That code had roundoff error issues and was replaced by sreals. Now max is mostly used to determine cfg->max_count which is used to scale counts to reasonable integers intraprocedurally and is still being used i.e. by IRA. There are also few places where max is used for normal arithmetics when updating profile. For computing max_count we need max to still be a bit special so max (uninitialized, initialized) returns initialized rather then uninitialized. Partial profiles are later handled specially. This patch renames max to max_prefer_initialized to make it clear and updates implementation to require compatible profiles. I checked this behaviour is good for other places using it as well. I also turned function to static, since a = a->max (b) looks odd. gcc/ChangeLog: * auto-profile.cc (scale_bb_profile): Use profile_count::max_prefer_initialized. (afdo_adjust_guessed_profile): Likewise. * bb-reorder.cc (edge_order): Do not use max. * cfghooks.cc (merge_blocks): Likewise. * ipa-fnsummary.cc (param_change_prob): Likewise. * ipa-inline-transform.cc (inline_transform): Likewise. * predict.cc (update_max_bb_count): Likewise. (estimate_bb_frequencies): Likewise. (rebuild_frequencies): Likewise. * tree-ssa-loop-unswitch.cc (struct unswitch_predicate): Likewise. * profile-count.h (profile_count::max): Rename to (profile_count::max_prefer_initialized): this; update handling of qualities. --- diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index 6971204ddf5..cf7a2191336 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -3600,11 +3600,12 @@ scale_bb_profile () { profile_count cnt = bb->count; bbs.safe_push (bb); - max_count = max_count.max (bb->count); + max_count = profile_count::max_prefer_initialized (max_count, cnt); if (afdo_set_bb_count (bb, zero_bbs)) { std::swap (cnt, bb->count); - max_count_in_fn = max_count_in_fn.max (cnt); + max_count_in_fn + = profile_count::max_prefer_initialized (max_count_in_fn, cnt); add_scale (&scales, cnt, bb->count); } } @@ -3646,7 +3647,8 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb) if (is_bb_annotated (seed_bb, *annotated_bb)) { component[seed_bb->index] = 1; - max_count_in_fn = max_count_in_fn.max (seed_bb->count); + max_count_in_fn + = profile_count::max_prefer_initialized (max_count_in_fn, seed_bb->count); } else component[seed_bb->index] = 0; @@ -3669,7 +3671,7 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb) basic_block b = stack.pop (); bbs.quick_push (b); - max_count = max_count.max (b->count); + max_count = profile_count::max_prefer_initialized (max_count, b->count); for (edge e: b->preds) if (!component[e->src->index]) diff --git a/gcc/bb-reorder.cc b/gcc/bb-reorder.cc index 641b4928ffb..e4efdee0b16 100644 --- a/gcc/bb-reorder.cc +++ b/gcc/bb-reorder.cc @@ -2389,8 +2389,10 @@ edge_order (const void *ve1, const void *ve2) /* Since profile_count::operator< does not establish a strict weak order in presence of uninitialized counts, use 'max': this makes them appear as if having execution frequency less than any initialized count. */ - profile_count m = c1.max (c2); - return (m == c2) - (m == c1); + gcov_type gc1 = c1.initialized_p () ? c1.to_gcov_type () : 0; + gcov_type gc2 = c2.initialized_p () ? c2.to_gcov_type () : 0; + gcov_type m = MAX (gc1, gc2); + return (m == gc1) - (m == gc2); } /* Reorder basic blocks using the "simple" algorithm. This tries to diff --git a/gcc/cfghooks.cc b/gcc/cfghooks.cc index 25bc5d4b273..01169e22fbb 100644 --- a/gcc/cfghooks.cc +++ b/gcc/cfghooks.cc @@ -824,7 +824,7 @@ merge_blocks (basic_block a, basic_block b) else if (a->count.quality () < b->count.quality ()) a->count = b->count; else if (a->count.quality () == b->count.quality ()) - a->count = a->count.max (b->count); + a->count = profile_count::max_prefer_initialized (a->count, b->count); cfg_hooks->merge_blocks (a, b); diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc index dd41de98ff1..28f79aad29e 100644 --- a/gcc/ipa-fnsummary.cc +++ b/gcc/ipa-fnsummary.cc @@ -2356,7 +2356,8 @@ param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i) /* Lookup the most frequent update of the value and believe that it dominates all the other; precise analysis here is difficult. */ EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi) - max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count); + max = profile_count::max_prefer_initialized + (max, BASIC_BLOCK_FOR_FN (cfun, index)->count); if (dump_file) { fprintf (dump_file, " Set with count "); diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc index a2048549665..99969aae189 100644 --- a/gcc/ipa-inline-transform.cc +++ b/gcc/ipa-inline-transform.cc @@ -833,7 +833,8 @@ inline_transform (struct cgraph_node *node) FOR_ALL_BB_FN (bb, cfun) { bb->count = bb->count.apply_scale (num, den); - cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + cfun->cfg->count_max = profile_count::max_prefer_initialized + (cfun->cfg->count_max, bb->count); } ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count; } diff --git a/gcc/predict.cc b/gcc/predict.cc index 895c5f959d0..d937cc699b0 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -3849,7 +3849,7 @@ update_max_bb_count (void) basic_block bb; FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - true_count_max = true_count_max.max (bb->count); + true_count_max = profile_count::max_prefer_initialized (true_count_max, bb->count); cfun->cfg->count_max = true_count_max; @@ -4162,7 +4162,9 @@ estimate_bb_frequencies () executed, then preserve this info. */ if (!(bb->count == profile_count::zero ())) bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count); - cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + cfun->cfg->count_max + = profile_count::max_prefer_initialized (cfun->cfg->count_max, + bb->count); } free_aux_for_blocks (); @@ -4473,7 +4475,9 @@ rebuild_frequencies (void) cfun->cfg->count_max = profile_count::uninitialized (); FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) { - cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + cfun->cfg->count_max + = profile_count::max_prefer_initialized (cfun->cfg->count_max, + bb->count); if (bb->count.nonzero_p () && bb->count.quality () >= AFDO) feedback_found = true; /* Uninitialized count may be result of inlining or an omision in an diff --git a/gcc/profile-count.h b/gcc/profile-count.h index 89746c6749f..85e601e5001 100644 --- a/gcc/profile-count.h +++ b/gcc/profile-count.h @@ -1109,30 +1109,23 @@ public: /* Make counter forcibly nonzero. */ profile_count force_nonzero () const; - profile_count max (profile_count other) const - { - profile_count val = *this; - /* Always prefer nonzero IPA counts over local counts. */ - if (ipa ().nonzero_p () || other.ipa ().nonzero_p ()) - { - val = ipa (); - other = other.ipa (); - } - if (!initialized_p ()) - return other; - if (!other.initialized_p ()) - return *this; - if (*this == zero ()) - return other; - if (other == zero ()) - return *this; - gcc_checking_assert (compatible_p (other)); - if (val.m_val < other.m_val || (m_val == other.m_val - && val.m_quality < other.m_quality)) - return other; - return *this; - } + /* Return maximum of A and B. If one of values is uninitialized return the + other. */ + + static profile_count + max_prefer_initialized (const profile_count a, const profile_count b) + { + if (!a.initialized_p ()) + return b; + if (!b.initialized_p ()) + return a; + profile_count ret; + gcc_checking_assert (a.compatible_p (b)); + ret.m_val = MAX (a.m_val, b.m_val); + ret.m_quality = MIN (a.m_quality, b.m_quality); + return ret; + } /* PROB is a probability in scale 0...REG_BR_PROB_BASE. Scale counter accordingly. */ diff --git a/gcc/tree-ssa-loop-unswitch.cc b/gcc/tree-ssa-loop-unswitch.cc index c5ca4ff8945..dc0cb245754 100644 --- a/gcc/tree-ssa-loop-unswitch.cc +++ b/gcc/tree-ssa-loop-unswitch.cc @@ -136,7 +136,8 @@ struct unswitch_predicate tree rhs = gimple_cond_rhs (stmt); enum tree_code code = gimple_cond_code (stmt); condition = build2 (code, boolean_type_node, lhs, rhs); - count = EDGE_SUCC (bb, 0)->count ().max (EDGE_SUCC (bb, 1)->count ()); + count = profile_count::max_prefer_initialized (EDGE_SUCC (bb, 0)->count (), + EDGE_SUCC (bb, 1)->count ()); if (irange::supports_p (TREE_TYPE (lhs))) { auto range_op = range_op_handler (code);