the patch I sent from airport only worked if you produced the gcda files with
unpatched compiler. For some reason auto-profile reading is interwinded into
gcov reading which is not necessary. Here is cleaner version which also
makes the format bit more convenient. One can now grep as:
grep "bb.*fdo.*very hot.*cold" *.profile | sort -n -k 5 -r | less
digits_2/30 bb 307 fdo
10273284651 (very hot) afdo 0 (auto FDO) (cold) scaled 0 diff -
10273284651, -100.00%
digits_2/30 bb 201 fdo
2295561442 (very hot) afdo 19074 (auto FDO) (cold) scaled
1341585 diff -
2294219857, -99.94%
digits_2/30 bb 203 fdo
1236123372 (very hot) afdo 9537 (auto FDO) (cold) scaled 670792 diff -
1235452580, -99.95%
digits_2/30 bb 200 fdo
1236123372 (very hot) afdo 9537 (auto FDO) (cold) scaled 670792 diff -
1235452580, -99.95%
digits_2/30 bb 202 fdo
1059438070 (very hot) afdo 9537 (auto FDO) (cold) scaled 670792 diff -
1058767278, -99.94%
new_solver/9 bb 246 fdo
413879041 (very hot) afdo 76594 (guessed) (cold) scaled
5387299 diff -
408491742, -98.70%
new_solver/9 bb 167 fdo
413792205 (very hot) afdo 76594 (guessed) (cold) scaled
5387299 diff -
408404906, -98.70%
new_solver/9 bb 159 fdo
387809230 (very hot) afdo 57182 (guessed) (cold) scaled
4021940 diff -
383787290, -98.96%
new_solver/9 bb 158 fdo
387809230 (very hot) afdo 60510 (guessed) (cold) scaled
4256018 diff -
383553212, -98.90%
new_solver/9 bb 138 fdo
387809230 (very hot) afdo 40917 (guessed) (cold) scaled
2877929 diff -
384931301, -99.26%
new_solver/9 bb 137 fdo
387809230 (very hot) afdo 43298 (guessed) (cold) scaled
3045398 diff -
384763832, -99.21%
This dumps basic blocks that do have large counts by normal profile feedback
but autofdo gives them small count (so they get cold). These seems to be
indeed mostly basic blocks controlling loops.
gcc/ChangeLog:
* auto-profile.cc (afdo_hot_bb_threshod): New global
variable.
(maybe_hot_afdo_count_p): New function.
(autofdo_source_profile::read): Do not set up dump file;
set afdo_hot_bb_threshod.
(afdo_annotate_cfg): Handle partial training.
(afdo_callsite_hot_enough_for_early_inline):
Use maybe_hot_afdo_count_p.
(auto_profile_offline::execute): Read autofdo file.
* auto-profile.h (maybe_hot_afdo_count_p): Declare.
(afdo_hot_bb_threshold): Declare.
* coverage.cc (read_counts_file): Also set gcov_profile_info.
(coverage_init): Do not read autofdo file.
* opts.cc (enable_fdo_optimizations): Add autofdo parameter;
do not set flag_branch_probabilities and flag_profile_values
with it.
(common_handle_option): Update.
* passes.cc (finish_optimization_passes): Do not end branch
prob here.
(pass_manager::dump_profile_report): Also mark change after
autofdo pass.
* profile.cc: Include auto-profile.h
(gcov_profile_info): New global variable.
(struct afdo_fdo_record): New struture.
(compute_branch_probabilities): Record afdo profile.
(end_branch_prob): Dump afdo/fdo profile comparsion.
* profile.h (gcov_profile_info): Declarre.
* tree-profile.cc (tree_profiling): Call end_branch_prob
(pass_ipa_tree_profile::gate): Also enable with autoFDO
#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
#define AUTO_PROFILE_VERSION 2
+/* profile counts determined by AFDO smaller than afdo_hot_bb_threshold are
+ considered cols. */
+gcov_type afdo_hot_bb_threshod = -1;
+
+/* Return ture if COUNT is possiby hot. */
+bool
+maybe_hot_afdo_count_p (profile_count count)
+{
+ gcc_checking_assert (count.ipa ().initialized_p ());
+ return count.ipa ().to_gcov_type () >= afdo_hot_bb_threshod;
+}
+
namespace autofdo
{
/* Read in the function/callsite profile, and store it in local
data structure. */
unsigned function_num = gcov_read_unsigned ();
- int profile_pass_num
- = g->get_passes ()->get_pass_auto_profile ()->static_pass_number;
- g->get_dumps ()->dump_start (profile_pass_num, NULL);
for (unsigned i = 0; i < function_num; i++)
{
function_instance::function_instance_stack stack;
"auto-profile contains duplicated function instance %s",
afdo_string_table->get_name (s->name ()));
}
+ int hot_frac = param_hot_bb_count_fraction;
/* Scale up the profile, but leave some bits in case some counts gets
bigger than sum_max eventually. */
if (afdo_profile_info->sum_max)
afdo_count_scale
= MAX (((gcov_type)1 << (profile_count::n_bits / 2))
/ afdo_profile_info->sum_max, 1);
+ afdo_hot_bb_threshod
+ = hot_frac
+ ? afdo_profile_info->sum_max * afdo_count_scale / hot_frac
+ : (gcov_type)profile_count::max_count;
+ set_hot_bb_threshold (afdo_hot_bb_threshod);
if (dump_file)
fprintf (dump_file, "Max count in profile %" PRIu64 "\n"
"Setting scale %" PRIu64 "\n"
(int64_t)afdo_profile_info->sum_max,
(int64_t)afdo_count_scale,
(int64_t)(afdo_profile_info->sum_max * afdo_count_scale),
- (int64_t)(afdo_profile_info->sum_max * afdo_count_scale
- / param_hot_bb_count_fraction));
+ (int64_t)afdo_hot_bb_threshod);
afdo_profile_info->sum_max *= afdo_count_scale;
- g->get_dumps ()->dump_finish (profile_pass_num);
return true;
}
if (dump_file)
fprintf (dump_file, "No afdo profile for %s\n",
cgraph_node::get (current_function_decl)->dump_name ());
+ /* create_gcov only dumps symbols with some samples in them.
+ This means that we get nonempty zero_bbs only if some
+ nonzero counts in profile were not matched with statements. */
+ if (!flag_profile_partial_training)
+ {
+ FOR_ALL_BB_FN (bb, cfun)
+ if (bb->count.quality () == GUESSED_LOCAL)
+ bb->count = bb->count.global0afdo ();
+ update_max_bb_count ();
+ }
return;
}
if (dump_file)
fprintf (dump_file, "Setting global count to afdo0\n");
}
- FOR_ALL_BB_FN (bb, cfun)
- if (bb->count.quality () == GUESSED_LOCAL)
- bb->count = bb->count.global0afdo ();
+ if (!flag_profile_partial_training)
+ {
+ FOR_ALL_BB_FN (bb, cfun)
+ if (bb->count.quality () == GUESSED_LOCAL)
+ bb->count = bb->count.global0afdo ();
+ update_max_bb_count ();
+ }
loop_optimizer_finalize ();
free_dominance_info (CDI_DOMINATORS);
{
bool is_hot;
profile_count pcount = profile_count::from_gcov_type (count).afdo ();
- gcov_summary *saved_profile_info = profile_info;
- /* At early inline stage, profile_info is not set yet. We need to
- temporarily set it to afdo_profile_info to calculate hotness. */
- profile_info = autofdo::afdo_profile_info;
- is_hot = maybe_hot_count_p (NULL, pcount);
+ is_hot = maybe_hot_afdo_count_p (pcount);
if (dump_file)
{
fprintf (dump_file, "Call %s -> %s has %s afdo profile count ",
pcount.dump (dump_file);
fprintf (dump_file, "\n");
}
- profile_info = saved_profile_info;
return is_hot;
}
unsigned int
execute (function *) final override
{
+ read_autofdo_file ();
if (autofdo::afdo_source_profile)
autofdo::afdo_source_profile->offline_external_functions ();
return 0;
do not repeat it later. */
extern void remove_afdo_speculative_target (cgraph_edge *);
+/* profile counts determined by AFDO smaller than afdo_hot_bb_threshold are
+ considered cols. */
+extern gcov_type afdo_hot_bb_threshold;
+
+/* Return ture if COUNT is possiby hot. */
+extern bool maybe_hot_afdo_count_p (profile_count count);
+
#endif /* AUTO_PROFILE_H */
}
else if (tag == GCOV_TAG_OBJECT_SUMMARY)
{
- profile_info = XCNEW (gcov_summary);
+ gcov_profile_info = profile_info = XCNEW (gcov_summary);
profile_info->runs = gcov_read_unsigned ();
profile_info->sum_max = gcov_read_unsigned ();
}
strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX);
bbg_file_stamp = local_tick;
- if (flag_auto_profile)
- read_autofdo_file ();
- else if (flag_branch_probabilities)
+ if (flag_branch_probabilities)
read_counts_file ();
/* Name of bbg file. */
static void
enable_fdo_optimizations (struct gcc_options *opts,
struct gcc_options *opts_set,
- int value)
+ int value, bool autofdo)
{
- SET_OPTION_IF_UNSET (opts, opts_set, flag_branch_probabilities, value);
- SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_values, value);
+ if (!autofdo)
+ {
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_branch_probabilities, value);
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_values, value);
+ }
SET_OPTION_IF_UNSET (opts, opts_set, flag_unroll_loops, value);
SET_OPTION_IF_UNSET (opts, opts_set, flag_peel_loops, value);
SET_OPTION_IF_UNSET (opts, opts_set, flag_tracer, value);
/* No break here - do -fprofile-use processing. */
/* FALLTHRU */
case OPT_fprofile_use:
- enable_fdo_optimizations (opts, opts_set, value);
+ enable_fdo_optimizations (opts, opts_set, value, false);
SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_reorder_functions,
value);
/* Indirect call profiling should do all useful transformations
/* No break here - do -fauto-profile processing. */
/* FALLTHRU */
case OPT_fauto_profile:
- enable_fdo_optimizations (opts, opts_set, value);
+ enable_fdo_optimizations (opts, opts_set, value, true);
SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
break;
gcc::dump_manager *dumps = m_ctxt->get_dumps ();
timevar_push (TV_DUMP);
- if (coverage_instrumentation_p () || flag_test_coverage
- || flag_branch_probabilities)
- {
- dumps->dump_start (m_pass_profile_1->static_pass_number, NULL);
- end_branch_prob ();
- dumps->dump_finish (m_pass_profile_1->static_pass_number);
- }
/* Do whatever is necessary to finish printing the graphs. */
for (i = TDI_end; (dfi = dumps->get_dump_file_info (i)) != NULL; ++i)
fprintf (dump_file, "| %12.0f", profile_record[i].time);
/* Time units changes with profile estimate and feedback. */
if (i == m_pass_profile_1->static_pass_number
+ || i == m_pass_ipa_auto_profile_1->static_pass_number
|| i == m_pass_ipa_tree_profile_1->static_pass_number)
fprintf (dump_file, "-------------");
else if (rel_time_change)
#include "file-prefix-map.h"
#include "profile.h"
+#include "auto-profile.h"
struct condcov;
struct condcov *find_conditions (struct function*);
/* Counter summary from the last set of coverage counts read. */
-gcov_summary *profile_info;
+gcov_summary *profile_info, *gcov_profile_info;
/* Collect statistics on the performance of this pass for the entire source
file. */
static int total_num_branches;
static int total_num_conds;
+/* Map between auto-fdo and fdo counts used to compare quality
+ of the profiles. */
+struct afdo_fdo_record
+{
+ cgraph_node *node;
+ struct bb_record
+ {
+ /* Index of the basic block. */
+ int index;
+ profile_count afdo;
+ profile_count fdo;
+
+ /* Successors and predecessors in CFG. */
+ vec <int> preds;
+ vec <int> succs;
+ };
+ vec <bb_record> bbs;
+};
+
+static vec <afdo_fdo_record> afdo_fdo_records;
+
/* Forward declarations. */
static void find_spanning_tree (struct edge_list *);
BB_INFO (EXIT_BLOCK_PTR_FOR_FN (cfun))->succ_count = 2;
BB_INFO (ENTRY_BLOCK_PTR_FOR_FN (cfun))->pred_count = 2;
+ afdo_fdo_record record = {cgraph_node::get (current_function_decl), vNULL};;
+ if (dump_file && flag_auto_profile)
+ {
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ record.bbs.safe_push ({bb->index, bb->count.ipa (),
+ profile_count::uninitialized (), vNULL, vNULL});
+ record.bbs.last ().preds.reserve (EDGE_COUNT (bb->preds));
+ for (auto &e : bb->preds)
+ record.bbs.last ().preds.safe_push (e->src->index);
+ record.bbs.last ().succs.reserve (EDGE_COUNT (bb->succs));
+ for (auto &e : bb->succs)
+ record.bbs.last ().succs.safe_push (e->dest->index);
+ }
+ }
+
num_edges = read_profile_edge_counts (exec_counts);
if (dump_file)
delete edge_gcov_counts;
edge_gcov_counts = NULL;
+ if (dump_file && flag_auto_profile)
+ {
+ int i = 0;
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ gcc_checking_assert (record.bbs[i].index == bb->index);
+ record.bbs[i].fdo = bb->count.ipa ();
+ i++;
+ }
+ afdo_fdo_records.safe_push (record);
+ }
+
update_max_bb_count ();
if (dump_file)
}
fprintf (dump_file, "Total number of conditions: %d\n",
total_num_conds);
+ if (afdo_fdo_records.length ())
+ {
+ profile_count fdo_sum = profile_count::zero ();
+ profile_count afdo_sum = profile_count::zero ();
+ for (const auto &r : afdo_fdo_records)
+ for (const auto &b : r.bbs)
+ if (b.fdo.initialized_p () && b.afdo.initialized_p ())
+ {
+ fdo_sum += b.fdo;
+ afdo_sum += b.afdo;
+ }
+ for (auto &r : afdo_fdo_records)
+ {
+ for (auto &b : r.bbs)
+ if (b.fdo.initialized_p () && b.afdo.initialized_p ())
+ {
+ fprintf (dump_file, "%s bb %i fdo %" PRIu64 " (%s) afdo ",
+ r.node->dump_name (), b.index,
+ (int64_t)b.fdo.to_gcov_type (),
+ maybe_hot_count_p
+ (NULL, b.fdo.apply_scale (1, 1000))
+ ? "very hot"
+ : maybe_hot_count_p (NULL, b.fdo)
+ ? "hot" : "cold");
+ b.afdo.dump (dump_file);
+ fprintf (dump_file, " (%s) ",
+ maybe_hot_afdo_count_p
+ (b.afdo.apply_scale (1, 1000))
+ ? "very hot"
+ : maybe_hot_afdo_count_p (b.afdo)
+ ? "hot" : "cold");
+ if (afdo_sum.nonzero_p ())
+ {
+ profile_count scaled
+ = b.afdo.apply_scale (fdo_sum, afdo_sum);
+ fprintf (dump_file, "scaled %" PRIu64,
+ scaled.to_gcov_type ());
+ if (b.fdo.to_gcov_type ())
+ fprintf (dump_file, " diff %" PRId64 ", %+2.2f%%",
+ scaled.to_gcov_type ()
+ - b.fdo.to_gcov_type (),
+ (scaled.to_gcov_type ()
+ - b.fdo.to_gcov_type ()) * 100.0
+ / b.fdo.to_gcov_type ());
+ }
+ fprintf (dump_file, "\n preds");
+ for (int val : b.preds)
+ fprintf (dump_file, " %i", val);
+ b.preds.release ();
+ fprintf (dump_file, "\n succs");
+ for (int val : b.succs)
+ fprintf (dump_file, " %i", val);
+ b.succs.release ();
+ fprintf (dump_file, "\n");
+ }
+ r.bbs.release ();
+ }
+ }
+ afdo_fdo_records.release ();
}
}
/* Counter summary from the last set of coverage counts read by
profile.cc. */
-extern struct gcov_summary *profile_info;
+extern struct gcov_summary *profile_info, *gcov_profile_info;
/* Return true if any cfg coverage/profiling is enabled; -fprofile-arcs
-fcondition-coverage -fpath-coverage. */
handle_missing_profiles ();
del_node_map ();
+ end_branch_prob ();
return 0;
}
bool
pass_ipa_tree_profile::gate (function *)
{
- /* When profile instrumentation, use or test coverage shall be performed.
- But for AutoFDO, this there is no instrumentation, thus this pass is
- disabled. */
- return (!in_lto_p && !flag_auto_profile
+ /* When profile instrumentation, use or test coverage shall be performed. */
+ return (!in_lto_p
&& (flag_branch_probabilities || flag_test_coverage
|| coverage_instrumentation_p ())
&& !seen_error ());