+2008-08-18 Paul Yuan <yingbo.com@gmail.com>
+ Vinodha Ramasamy <vinodha@google.com>
+
+ * cgraph.c (cgraph_edge): Handle inconsistent counts when setting
+ count_scale.
+ * value-prof.c (check_counter): Fix the counter if
+ flag_profile_correction is true.
+ (tree_divmod_fixed_value_transform, tree_mod_pow2_value_transform,
+ tree_mod_subtract_transform):
+ Follow check_counter parameter change.
+ * common.opt (fprofile-correction): New option.
+ * mcf.c: New file.
+ * profile.c (edge_info, EDGE_INFO): Moved to new file profile.h.
+ (sum_edge_counts, is_edge_inconsistent, correct_negative_edge_counts,
+ is_inconsistent, set_bb_counts, read_profile_edge_counts): New
+ functions.
+ (compute_branch_probabilities): Refactored. Invokes mcf_smooth_cfg if
+ flag_profile_correction is set.
+
2008-08-18 Richard Sandiford <rdsandiford@googlemail.com>
* rtlanal.c (subreg_offset_representable_p): Check HARD_REGNO_MODE_OK.
loop-unroll.o \
loop-unswitch.o \
lower-subreg.o \
+ mcf.o \
mode-switching.o \
modulo-sched.o \
omega.o \
profile.o : profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) $(FUNCTION_H) \
$(TOPLEV_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h cfghooks.h \
- $(CFGLOOP_H) $(TIMEVAR_H) tree-pass.h
+ $(CFGLOOP_H) $(TIMEVAR_H) tree-pass.h profile.h
+mcf.o : mcf.c profile.h $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h \
+ $(BASIC_BLOCK_H) output.h langhooks.h $(GCOV_IO_H) $(TREE_H)
tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) \
$(FUNCTION_H) $(TOPLEV_H) $(COVERAGE_H) $(TREE_H) value-prof.h $(TREE_DUMP_H) \
$(srcdir)/emit-rtl.c $(srcdir)/except.c $(srcdir)/explow.c $(srcdir)/expr.c \
$(srcdir)/function.c $(srcdir)/except.h \
$(srcdir)/gcse.c $(srcdir)/integrate.c $(srcdir)/lists.c $(srcdir)/optabs.c \
- $(srcdir)/profile.c $(srcdir)/regclass.c \
+ $(srcdir)/profile.c $(srcdir)/regclass.c $(srcdir)/mcf.c \
$(srcdir)/reg-stack.c $(srcdir)/cfglayout.c $(srcdir)/cfglayout.h \
$(srcdir)/sdbout.c $(srcdir)/stor-layout.c \
$(srcdir)/stringpool.c $(srcdir)/tree.c $(srcdir)/varasm.c \
if (node->call_site_hash)
return (struct cgraph_edge *)
htab_find_with_hash (node->call_site_hash, call_stmt,
- htab_hash_pointer (call_stmt));
+ htab_hash_pointer (call_stmt));
/* This loop may turn out to be performance problem. In such case adding
hashtables into call nodes with very many edges is probably best
new_node->master_clone = n->master_clone;
new_node->count = count;
if (n->count)
- count_scale = new_node->count * REG_BR_PROB_BASE / n->count;
+ {
+ if (new_node->count > n->count)
+ count_scale = REG_BR_PROB_BASE;
+ else
+ count_scale = new_node->count * REG_BR_PROB_BASE / n->count;
+ }
else
count_scale = 0;
if (update_original)
Set the top-level directory for storing the profile data.
The default is 'pwd'.
+fprofile-correction
+Common Report Var(flag_profile_correction)
+Enable correction of flow inconsistent profile data input
+
fprofile-generate
Common
Enable common options for generating profile info for profile feedback directed optimizations
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
-fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls @gol
-fpeel-loops -fpredictive-commoning -fprefetch-loop-arrays @gol
--fprofile-dir=@var{path} -fprofile-generate -fprofile-generate=@var{path} @gol
+-fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
+-fprofile-generate=@var{path} @gol
-fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
-freciprocal-math -fregmove -frename-registers -freorder-blocks @gol
-freorder-blocks-and-partition -freorder-functions @gol
Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}.
+@item -fprofile-correction
+@opindex fprofile-correction
+Profiles collected using an instrumented binary for multi-threaded programs may
+be inconsistent due to missed counter updates. When this option is specified,
+GCC will use heuristics to correct or smooth out such inconsistencies. By
+default, GCC will emit an error message when an inconsistent profile is detected.
+
@item -fprofile-dir=@var{path}
@opindex fprofile-dir
#include "cfgloop.h"
#include "tree-pass.h"
+#include "profile.h"
+
/* Hooks for profiling. */
static struct profile_hooks* profile_hooks;
-/* Additional information about the edges we need. */
-struct edge_info {
- unsigned int count_valid : 1;
-
- /* Is on the spanning tree. */
- unsigned int on_tree : 1;
-
- /* Pretend this edge does not exist (it is abnormal and we've
- inserted a fake to compensate). */
- unsigned int ignore : 1;
-};
-
struct bb_info {
unsigned int count_valid : 1;
gcov_type pred_count;
};
-#define EDGE_INFO(e) ((struct edge_info *) (e)->aux)
#define BB_INFO(b) ((struct bb_info *) (b)->aux)
static basic_block find_group (basic_block);
static void union_groups (basic_block, basic_block);
-\f
/* Add edge instrumentation code to the entire insn chain.
F is the first insn of the chain.
return counts;
}
-\f
-/* Compute the branch probabilities for the various branches.
- Annotate them accordingly. */
+
+static bool
+is_edge_inconsistent (VEC(edge,gc) *edges)
+{
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, edges)
+ {
+ if (!EDGE_INFO (e)->ignore)
+ {
+ if (e->count < 0)
+ return true;
+ }
+ }
+ return false;
+}
static void
-compute_branch_probabilities (void)
+correct_negative_edge_counts (void)
{
basic_block bb;
- int i;
- int num_edges = 0;
- int changes;
- int passes;
- int hist_br_prob[20];
- int num_never_executed;
- int num_branches;
- gcov_type *exec_counts = get_exec_counts ();
- int exec_counts_pos = 0;
+ edge e;
+ edge_iterator ei;
- /* Very simple sanity checks so we catch bugs in our profiling code. */
- if (profile_info)
+ FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
{
- if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
- {
- error ("corrupted profile info: run_max * runs < sum_max");
- exec_counts = NULL;
- }
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (e->count < 0)
+ e->count = 0;
+ }
+ }
+}
- if (profile_info->sum_all < profile_info->sum_max)
- {
- error ("corrupted profile info: sum_all is smaller than sum_max");
- exec_counts = NULL;
- }
+/* Check consistency.
+ Return true if inconsistency is found. */
+static bool
+is_inconsistent (void)
+{
+ basic_block bb;
+ FOR_EACH_BB (bb)
+ {
+ if (is_edge_inconsistent (bb->preds))
+ return true;
+ if (is_edge_inconsistent (bb->succs))
+ return true;
+ if ( bb->count != sum_edge_counts (bb->preds)
+ || (bb->count != sum_edge_counts (bb->succs) &&
+ !(find_edge (bb, EXIT_BLOCK_PTR) != NULL &&
+ block_ends_with_call_p (bb))))
+ return true;
}
- /* Attach extra info block to each bb. */
+ return false;
+}
- alloc_aux_for_blocks (sizeof (struct bb_info));
+/* Set each basic block count to the sum of its outgoing edge counts */
+static void
+set_bb_counts (void)
+{
+ basic_block bb;
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
{
- edge e;
- edge_iterator ei;
-
- FOR_EACH_EDGE (e, ei, bb->succs)
- if (!EDGE_INFO (e)->ignore)
- BB_INFO (bb)->succ_count++;
- FOR_EACH_EDGE (e, ei, bb->preds)
- if (!EDGE_INFO (e)->ignore)
- BB_INFO (bb)->pred_count++;
+ bb->count = sum_edge_counts (bb->succs);
+ gcc_assert (bb->count >= 0);
}
+}
- /* Avoid predicting entry on exit nodes. */
- BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
- BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
-
+/* Reads profile data and returns total number of edge counts read */
+static int
+read_profile_edge_counts (gcov_type *exec_counts)
+{
+ basic_block bb;
+ int num_edges = 0;
+ int exec_counts_pos = 0;
/* For each edge not on the spanning tree, set its execution count from
the .da file. */
-
/* The first count in the .da file is the number of times that the function
was entered. This is the exec_count for block zero. */
}
}
+ return num_edges;
+}
+
+/* Compute the branch probabilities for the various branches.
+ Annotate them accordingly. */
+
+static void
+compute_branch_probabilities (void)
+{
+ basic_block bb;
+ int i;
+ int num_edges = 0;
+ int changes;
+ int passes;
+ int hist_br_prob[20];
+ int num_never_executed;
+ int num_branches;
+ gcov_type *exec_counts = get_exec_counts ();
+ int inconsistent = 0;
+
+ /* Very simple sanity checks so we catch bugs in our profiling code. */
+ if (profile_info)
+ {
+ if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
+ {
+ error ("corrupted profile info: run_max * runs < sum_max");
+ exec_counts = NULL;
+ }
+
+ if (profile_info->sum_all < profile_info->sum_max)
+ {
+ error ("corrupted profile info: sum_all is smaller than sum_max");
+ exec_counts = NULL;
+ }
+ }
+
+ /* Attach extra info block to each bb. */
+ alloc_aux_for_blocks (sizeof (struct bb_info));
+ FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
+ {
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (!EDGE_INFO (e)->ignore)
+ BB_INFO (bb)->succ_count++;
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (!EDGE_INFO (e)->ignore)
+ BB_INFO (bb)->pred_count++;
+ }
+
+ /* Avoid predicting entry on exit nodes. */
+ BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
+ BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
+
+ num_edges = read_profile_edge_counts (exec_counts);
+
if (dump_file)
fprintf (dump_file, "\n%d edge counts read\n", num_edges);
gcc_assert (!BB_INFO (bb)->succ_count && !BB_INFO (bb)->pred_count);
}
+ /* Check for inconsistent basic block counts */
+ inconsistent = is_inconsistent ();
+
+ if (inconsistent)
+ {
+ if (flag_profile_correction)
+ {
+ /* Inconsistency detected. Make it flow-consistent. */
+ static int informed = 0;
+ if (informed == 0)
+ {
+ informed = 1;
+ inform ("correcting inconsistent profile data");
+ }
+ correct_negative_edge_counts ();
+ /* Set bb counts to the sum of the outgoing edge counts */
+ set_bb_counts ();
+ if (dump_file)
+ fprintf (dump_file, "\nCalling mcf_smooth_cfg\n");
+ mcf_smooth_cfg ();
+ }
+ else
+ error ("corrupted profile info: profile data is not flow-consistent");
+ }
+
/* For every edge, calculate its branch probability and add a reg_note
to the branch insn to indicate this. */
somehow. */
static bool
-check_counter (gimple stmt, const char *name, gcov_type all, gcov_type bb_count)
+check_counter (gimple stmt, const char * name,
+ gcov_type *count, gcov_type *all, gcov_type bb_count)
{
- if (all != bb_count)
+ if (*all != bb_count || *count > *all)
{
location_t locus;
locus = (stmt != NULL)
- ? gimple_location (stmt)
- : DECL_SOURCE_LOCATION (current_function_decl);
- error ("%HCorrupted value profile: %s profiler overall count (%d) "
- "does not match BB count (%d)", &locus, name, (int)all,
- (int)bb_count);
- return true;
+ ? gimple_location (stmt)
+ : DECL_SOURCE_LOCATION (current_function_decl);
+ if (flag_profile_correction)
+ {
+ inform ("%HCorrecting inconsistent value profile: "
+ "%s profiler overall count (%d) does not match BB count "
+ "(%d)", &locus, name, (int)all, (int)bb_count);
+ *all = bb_count;
+ if (*count > *all)
+ *count = *all;
+ return false;
+ }
+ else
+ {
+ error ("%HCorrupted value profile: %s profiler overall count (%d) "
+ "does not match BB count (%d)", &locus, name, (int)all,
+ (int)bb_count);
+ return true;
+ }
}
return false;
|| !maybe_hot_bb_p (gimple_bb (stmt)))
return false;
- if (check_counter (stmt, "value", all, gimple_bb (stmt)->count))
+ if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
return false;
/* Compute probability of taking the optimal path. */
/* Compute probability of taking the optimal path. */
all = count + wrong_values;
- if (check_counter (stmt, "pow2", all, gimple_bb (stmt)->count))
+ if (check_counter (stmt, "pow2", &count, &all, gimple_bb (stmt)->count))
return false;
if (all > 0)
count2 = histogram->hvalue.counters[1];
/* Compute probability of taking the optimal path. */
- if (check_counter (stmt, "interval", all, gimple_bb (stmt)->count))
+ if (check_counter (stmt, "interval", &count1, &all, gimple_bb (stmt)->count))
{
gimple_remove_histogram_value (cfun, stmt, histogram);
return false;
}
+ if (flag_profile_correction && count1 + count2 > all)
+ all = count1 + count2;
+
+ gcc_assert (count1 + count2 <= all);
+
/* We require that we use just subtractions in at least 50% of all
evaluations. */
count = 0;
gimple_ic_transform (gimple stmt)
{
histogram_value histogram;
- gcov_type val, count, all;
+ gcov_type val, count, all, bb_all;
gcov_type prob;
tree callee;
gimple modify;
if (4 * count <= 3 * all)
return false;
+ bb_all = gimple_bb (stmt)->count;
+ /* The order of CHECK_COUNTER calls is important -
+ since check_counter can correct the third parameter
+ and we want to make count <= all <= bb_all. */
+ if ( check_counter (stmt, "ic", &all, &bb_all, bb_all)
+ || check_counter (stmt, "ic", &count, &all, all))
+ return false;
+
if (all > 0)
prob = (count * REG_BR_PROB_BASE + all / 2) / all;
else
at least 80% of time. */
if ((6 * count / 5) < all || !maybe_hot_bb_p (gimple_bb (stmt)))
return false;
- if (check_counter (stmt, "value", all, gimple_bb (stmt)->count))
+ if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
return false;
if (all > 0)
prob = (count * REG_BR_PROB_BASE + all / 2) / all;