* ipa-fnsummary.c (record_modified_bb_info): Use sreal
frequencies. Fix estimation of aggregate parameters.
* gcc.dg/ipa/inline-1.c: Update template.
From-SVN: r255103
+2017-11-23 Jan Hubicka <hubicka@ucw.cz>
+
+ * ipa-fnsummary.c (record_modified_bb_info): Use sreal
+ frequencies. Fix estimation of aggregate parameters.
+
2017-11-23 Jan Hubicka <hubicka@ucw.cz>
* cgraphclones.c (cgraph_node::create_clone): Fix updating of profile
2017-11-23 Jan Hubicka <hubicka@ucw.cz>
* cgraphclones.c (cgraph_node::create_clone): Fix updating of profile
static profile_count max_count;
static profile_count spec_rem;
static profile_count max_count;
static profile_count spec_rem;
-/* Pre-computed constant 1/100. */
-static sreal percent_rec;
-
/* Return false when inlining edge E would lead to violating
limits on function unit growth or stack usage growth.
/* Return false when inlining edge E would lead to violating
limits on function unit growth or stack usage growth.
: edge->caller);
sreal freq = edge->sreal_frequency ();
: edge->caller);
sreal freq = edge->sreal_frequency ();
uninlined_call_time *= freq;
else
uninlined_call_time = uninlined_call_time >> 11;
uninlined_call_time *= freq;
else
uninlined_call_time = uninlined_call_time >> 11;
sreal caller_time = ipa_fn_summaries->get (caller)->time;
sreal freq = edge->sreal_frequency ();
sreal caller_time = ipa_fn_summaries->get (caller)->time;
sreal freq = edge->sreal_frequency ();
time *= freq;
else
time = time >> 11;
time *= freq;
else
time = time >> 11;
sreal time = compute_uninlined_call_time (e, unspec_time);
sreal inlined_time = compute_inlined_call_time (e, spec_time);
sreal time = compute_uninlined_call_time (e, unspec_time);
sreal inlined_time = compute_inlined_call_time (e, spec_time);
- if (time - inlined_time
- > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP))
- * percent_rec)
+ if (time - inlined_time * 100
+ > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)))
return true;
return false;
}
return true;
return false;
}
{
char const *reason = NULL;
bool want_inline = true;
{
char const *reason = NULL;
bool want_inline = true;
- int caller_freq = CGRAPH_FREQ_BASE;
int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
if (DECL_DECLARED_INLINE_P (edge->caller->decl))
int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
if (DECL_DECLARED_INLINE_P (edge->caller->decl))
reason = "recursive call is cold";
want_inline = false;
}
reason = "recursive call is cold";
want_inline = false;
}
- else if (!outer_node->count.ipa ().nonzero_p ())
- {
- reason = "not executed in profile";
- want_inline = false;
- }
else if (depth > max_depth)
{
reason = "--param max-inline-recursive-depth exceeded.";
want_inline = false;
}
else if (depth > max_depth)
{
reason = "--param max-inline-recursive-depth exceeded.";
want_inline = false;
}
-
- if (outer_node->global.inlined_to)
- caller_freq = outer_node->callers->frequency ();
-
- if (!caller_freq)
+ else if (outer_node->global.inlined_to
+ && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
- reason = "function is inlined and unlikely";
+ reason = "caller frequency is 0";
want_inline = false;
}
if (!want_inline)
;
want_inline = false;
}
if (!want_inline)
;
- /* Inlining of self recursive function into copy of itself within other function
- is transformation similar to loop peeling.
+ /* Inlining of self recursive function into copy of itself within other
+ function is transformation similar to loop peeling.
Peeling is profitable if we can inline enough copies to make probability
of actual call to the self recursive function very small. Be sure that
the probability of recursion is small.
We ensure that the frequency of recursing is at most 1 - (1/max_depth).
Peeling is profitable if we can inline enough copies to make probability
of actual call to the self recursive function very small. Be sure that
the probability of recursion is small.
We ensure that the frequency of recursing is at most 1 - (1/max_depth).
- This way the expected number of recision is at most max_depth. */
+ This way the expected number of recursion is at most max_depth. */
- int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
- / max_depth);
+ sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
int i;
for (i = 1; i < depth; i++)
int i;
for (i = 1; i < depth; i++)
- max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
- if (max_count.nonzero_p () && edge->count.ipa ().nonzero_p ()
- && (edge->count.ipa ().to_gcov_type () * CGRAPH_FREQ_BASE
- / outer_node->count.ipa ().to_gcov_type ()
- >= max_prob))
- {
- reason = "profile of recursive call is too large";
- want_inline = false;
- }
- if (!max_count.nonzero_p ()
- && (edge->frequency () * CGRAPH_FREQ_BASE / caller_freq
- >= max_prob))
+ max_prob = max_prob * max_prob;
+ if (edge->sreal_frequency () >= max_prob * caller_freq)
{
reason = "frequency of recursive call is too large";
want_inline = false;
}
}
{
reason = "frequency of recursive call is too large";
want_inline = false;
}
}
- /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
- depth is large. We reduce function call overhead and increase chances that
- things fit in hardware return predictor.
+ /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
+ recursion depth is large. We reduce function call overhead and increase
+ chances that things fit in hardware return predictor.
Recursive inlining might however increase cost of stack frame setup
actually slowing down functions whose recursion tree is wide rather than
Recursive inlining might however increase cost of stack frame setup
actually slowing down functions whose recursion tree is wide rather than
is tricky. For now we disable recursive inlining when probability of self
recursion is low.
is tricky. For now we disable recursive inlining when probability of self
recursion is low.
- Recursive inlining of self recursive call within loop also results in large loop
- depths that generally optimize badly. We may want to throttle down inlining
- in those cases. In particular this seems to happen in one of libstdc++ rb tree
- methods. */
+ Recursive inlining of self recursive call within loop also results in
+ large loop depths that generally optimize badly. We may want to throttle
+ down inlining in those cases. In particular this seems to happen in one
+ of libstdc++ rb tree methods. */
- if (max_count.nonzero_p () && edge->count.ipa ().initialized_p ()
- && (edge->count.ipa ().to_gcov_type () * 100
- / outer_node->count.ipa ().to_gcov_type ()
- <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
- {
- reason = "profile of recursive call is too small";
- want_inline = false;
- }
- else if ((!max_count.nonzero_p ()
- || !edge->count.ipa ().initialized_p ())
- && (edge->frequency () * 100 / caller_freq
- <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
+ if (edge->sreal_frequency () * 100
+ <= caller_freq
+ * PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))
{
reason = "frequency of recursive call is too small";
want_inline = false;
{
reason = "frequency of recursive call is too small";
want_inline = false;
numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
- inlined_time);
numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
- inlined_time);
numerator = ((sreal) 1 >> 8);
if (caller->count.ipa ().nonzero_p ())
numerator *= caller->count.ipa ().to_gcov_type ();
numerator = ((sreal) 1 >> 8);
if (caller->count.ipa ().nonzero_p ())
numerator *= caller->count.ipa ().to_gcov_type ();
&& callee_info->single_caller
&& !edge->caller->global.inlined_to
/* ... and edges executed only conditionally ... */
&& callee_info->single_caller
&& !edge->caller->global.inlined_to
/* ... and edges executed only conditionally ... */
- && edge->frequency () < CGRAPH_FREQ_BASE
+ && edge->sreal_frequency () < 1
/* ... consider case where callee is not inline but caller is ... */
&& ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
&& DECL_DECLARED_INLINE_P (caller->decl))
/* ... or when early optimizers decided to split and edge
frequency still indicates splitting is a win ... */
|| (callee->split_part && !caller->split_part
/* ... consider case where callee is not inline but caller is ... */
&& ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
&& DECL_DECLARED_INLINE_P (caller->decl))
/* ... or when early optimizers decided to split and edge
frequency still indicates splitting is a win ... */
|| (callee->split_part && !caller->split_part
- && edge->frequency ()
- < CGRAPH_FREQ_BASE
- * PARAM_VALUE
- (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100
+ && edge->sreal_frequency () * 100
+ < PARAM_VALUE
+ (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY)
/* ... and do not overwrite user specified hints. */
&& (!DECL_DECLARED_INLINE_P (edge->callee->decl)
|| DECL_DECLARED_INLINE_P (caller->decl)))))
/* ... and do not overwrite user specified hints. */
&& (!DECL_DECLARED_INLINE_P (edge->callee->decl)
|| DECL_DECLARED_INLINE_P (caller->decl)))))
overall_growth += 256 * 256 - 256;
denominator *= overall_growth;
}
overall_growth += 256 * 256 - 256;
denominator *= overall_growth;
}
- denominator *= inlined_time;
+ /*denominator *= inlined_time;*/
badness = - numerator / denominator;
badness = - numerator / denominator;
caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
compute_uninlined_call_time (edge,
unspec_edge_time).to_double (),
caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
compute_uninlined_call_time (edge,
unspec_edge_time).to_double (),
- compute_inlined_call_time (edge, edge_time).to_double (),
+ inlined_time.to_double (),
estimate_growth (callee),
callee_info->growth, overall_growth);
}
estimate_growth (callee),
callee_info->growth, overall_growth);
}
if (e->callee == node
|| (e->callee->ultimate_alias_target (&avail, e->caller) == node
&& avail > AVAIL_INTERPOSABLE))
if (e->callee == node
|| (e->callee->ultimate_alias_target (&avail, e->caller) == node
&& avail > AVAIL_INTERPOSABLE))
- {
- /* When profile feedback is available, prioritize by expected number
- of calls. */
- heap->insert (!(max_count > 0) || !e->count.ipa ().initialized_p () ? -e->frequency ()
- : -(e->count.ipa ().to_gcov_type ()
- / ((max_count.to_gcov_type () + (1<<24) - 1)
- / (1<<24))),
- e);
- }
+ heap->insert (-e->sreal_frequency (), e);
for (e = where->callees; e; e = e->next_callee)
if (!e->inline_failed)
lookup_recursive_calls (node, e->callee, heap);
for (e = where->callees; e; e = e->next_callee)
if (!e->inline_failed)
lookup_recursive_calls (node, e->callee, heap);
update_caller_keys (&edge_heap, where, updated_nodes, NULL);
/* Offline copy count has possibly changed, recompute if profile is
available. */
update_caller_keys (&edge_heap, where, updated_nodes, NULL);
/* Offline copy count has possibly changed, recompute if profile is
available. */
- if (max_count.nonzero_p ())
- {
- struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
- if (n != edge->callee && n->analyzed)
- update_callee_keys (&edge_heap, n, updated_nodes);
- }
+ struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
+ if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
+ update_callee_keys (&edge_heap, n, updated_nodes);
bitmap_clear (updated_nodes);
if (dump_file)
bitmap_clear (updated_nodes);
if (dump_file)
int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
- int64_t reason[CIF_N_REASONS][3];
+ int64_t reason[CIF_N_REASONS][2];
+ sreal reason_freq[CIF_N_REASONS];
int i;
struct cgraph_node *node;
memset (reason, 0, sizeof (reason));
int i;
struct cgraph_node *node;
memset (reason, 0, sizeof (reason));
+ for (i=0; i < CIF_N_REASONS; i++)
+ reason_freq[i] = 0;
FOR_EACH_DEFINED_FUNCTION (node)
{
struct cgraph_edge *e;
FOR_EACH_DEFINED_FUNCTION (node)
{
struct cgraph_edge *e;
{
if (e->count.ipa ().initialized_p ())
reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
{
if (e->count.ipa ().initialized_p ())
reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
- reason[(int) e->inline_failed][1] += e->frequency ();
- reason[(int) e->inline_failed][2] ++;
+ reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
+ reason[(int) e->inline_failed][1] ++;
if (DECL_VIRTUAL_P (e->callee->decl)
&& e->count.ipa ().initialized_p ())
{
if (DECL_VIRTUAL_P (e->callee->decl)
&& e->count.ipa ().initialized_p ())
{
dump_overall_stats ();
fprintf (dump_file, "\nWhy inlining failed?\n");
for (i = 0; i < CIF_N_REASONS; i++)
dump_overall_stats ();
fprintf (dump_file, "\nWhy inlining failed?\n");
for (i = 0; i < CIF_N_REASONS; i++)
- if (reason[i][2])
- fprintf (dump_file, "%-50s: %8i calls, %8i freq, %" PRId64" count\n",
+ if (reason[i][1])
+ fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
- (int) reason[i][2], (int) reason[i][1], reason[i][0]);
+ (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
}
/* Decide on the inlining. We do so in the topological order to avoid
}
/* Decide on the inlining. We do so in the topological order to avoid
int cold;
bool remove_functions = false;
int cold;
bool remove_functions = false;
- percent_rec = (sreal) 1 / (sreal) 100;
-
order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
if (dump_file)
order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
if (dump_file)
+2017-11-23 Jan Hubicka <hubicka@ucw.cz>
+
+ * gcc.dg/ipa/inline-1.c: Update template.
+
2017-11-23 Jakub Jelinek <jakub@redhat.com>
PR middle-end/82253
2017-11-23 Jakub Jelinek <jakub@redhat.com>
PR middle-end/82253
/* { dg-final { scan-ipa-dump-not "op0 change" "inline" } } */
/* { dg-final { scan-ipa-dump "op1 is compile time invariant" "inline" } } */
/* op2 is invariant within loop (we make assumption that function call does not afect it.). */
/* { dg-final { scan-ipa-dump-not "op0 change" "inline" } } */
/* { dg-final { scan-ipa-dump "op1 is compile time invariant" "inline" } } */
/* op2 is invariant within loop (we make assumption that function call does not afect it.). */
-/* { dg-final { scan-ipa-dump "op2 change 10.000000. of time" "inline" } } */
+/* { dg-final { scan-ipa-dump "op2 change 9.990000. of time" "inline" } } */
/* op3 is invariant within loop (we make assumption that function call does not afect it.). */
/* op3 is invariant within loop (we make assumption that function call does not afect it.). */
-/* { dg-final { scan-ipa-dump "op3 change 10.000000. of time" "inline" } } */
+/* { dg-final { scan-ipa-dump "op3 change 9.990000. of time" "inline" } } */
/* op4 is invariant within loop. */
/* op4 is invariant within loop. */
-/* { dg-final { scan-ipa-dump "op4 change 10.000000. of time" "inline" } } */
+/* { dg-final { scan-ipa-dump "op4 change 9.990000. of time" "inline" } } */
/* op5 is invariant within loop. */
/* op5 is invariant within loop. */
-/* { dg-final { scan-ipa-dump "op5 change 10.000000. of time" "inline" } } */
+/* { dg-final { scan-ipa-dump "op5 change 9.990000. of time" "inline" } } */
/* op6 is compile time invariant. */
/* { dg-final { scan-ipa-dump "op6 is compile time invariant" "inline" } } */
/* op7 change. */
/* op6 is compile time invariant. */
/* { dg-final { scan-ipa-dump "op6 is compile time invariant" "inline" } } */
/* op7 change. */