/* Interprocedural constant propagation
- Copyright (C) 2005-2019 Free Software Foundation, Inc.
+ Copyright (C) 2005-2020 Free Software Foundation, Inc.
Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
<mjambor@suse.cz>
inline bool set_contains_variable ();
bool add_value (valtype newval, cgraph_edge *cs,
ipcp_value<valtype> *src_val = NULL,
- int src_idx = 0, HOST_WIDE_INT offset = -1);
+ int src_idx = 0, HOST_WIDE_INT offset = -1,
+ ipcp_value<valtype> **val_p = NULL,
+ bool unlimited = false);
void print (FILE * f, bool dump_sources, bool dump_benefits);
};
/* Original overall size of the program. */
-static long overall_size, max_new_size;
+static long overall_size, orig_overall_size;
/* Node name to unique clone suffix number map. */
static hash_map<const char *, unsigned> *clone_num_suffixes;
class ipa_node_params *info;
info = IPA_NODE_REF (node);
- /* Skip constprop clones since we don't make lattices for them. */
- if (info->ipcp_orig_node)
+ /* Skip unoptimized functions and constprop clones since we don't make
+ lattices for them. */
+ if (!info || info->ipcp_orig_node)
continue;
fprintf (f, " Node: %s:\n", node->dump_name ());
count = ipa_get_param_count (info);
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; "
"-fipa-cp-clone disabled.\n",
- node->name ());
+ node->dump_name ());
return false;
}
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; "
"optimizing it for size.\n",
- node->name ());
+ node->dump_name ());
return false;
}
{
if (dump_file)
fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
- node->name ());
+ node->dump_name ());
return true;
}
if (dump_file)
fprintf (dump_file, "Considering %s for cloning; "
"usually called directly.\n",
- node->name ());
+ node->dump_name ());
return true;
}
}
{
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
- node->name ());
+ node->dump_name ());
return false;
}
if (dump_file)
fprintf (dump_file, "Considering %s for cloning.\n",
- node->name ());
+ node->dump_name ());
return true;
}
gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
if (TREE_CODE (input) == ADDR_EXPR)
{
- tree t = TREE_OPERAND (input, 0);
- t = build_ref_for_offset (EXPR_LOCATION (t), t,
- ipa_get_jf_ancestor_offset (jfunc), false,
- ptr_type_node, NULL, false);
- return build_fold_addr_expr (t);
+ gcc_checking_assert (is_gimple_ip_invariant_address (input));
+ poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
+ if (known_eq (off, 0))
+ return input;
+ poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
+ return build1 (ADDR_EXPR, TREE_TYPE (input),
+ fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
+ build_int_cst (ptr_type_node, byte_offset)));
}
else
return NULL_TREE;
/* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
meaning. OFFSET -1 means the source is scalar and not a part of an
- aggregate. */
+ aggregate. If non-NULL, VAL_P records address of existing or newly added
+ ipcp_value. UNLIMITED means whether value count should not exceed the limit
+ given by PARAM_IPA_CP_VALUE_LIST_SIZE. */
template <typename valtype>
bool
ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
ipcp_value<valtype> *src_val,
- int src_idx, HOST_WIDE_INT offset)
+ int src_idx, HOST_WIDE_INT offset,
+ ipcp_value<valtype> **val_p,
+ bool unlimited)
{
- ipcp_value<valtype> *val;
+ ipcp_value<valtype> *val, *last_val = NULL;
+
+ if (val_p)
+ *val_p = NULL;
if (bottom)
return false;
- for (val = values; val; val = val->next)
+ for (val = values; val; last_val = val, val = val->next)
if (values_equal_for_ipcp_p (val->value, newval))
{
+ if (val_p)
+ *val_p = val;
+
if (ipa_edge_within_scc (cs))
{
ipcp_value_source<valtype> *s;
for (s = val->sources; s; s = s->next)
- if (s->cs == cs)
+ if (s->cs == cs && s->val == src_val)
break;
if (s)
return false;
return false;
}
- if (values_count == param_ipa_cp_value_list_size)
+ if (!unlimited && values_count == opt_for_fn (cs->caller->decl,
+ param_ipa_cp_value_list_size))
{
/* We can only free sources, not the values themselves, because sources
of other values in this SCC might point to them. */
ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
}
}
-
values = NULL;
return set_to_bottom ();
}
values_count++;
val = allocate_and_init_ipcp_value (newval);
val->add_source (cs, src_val, src_idx, offset);
- val->next = values;
- values = val;
+ val->next = NULL;
+
+ /* Add the new value to end of value list, which can reduce iterations
+ of propagation stage for recursive function. */
+ if (last_val)
+ last_val->next = val;
+ else
+ values = val;
+
+ if (val_p)
+ *val_p = val;
+
+ return true;
+}
+
+/* Return true, if a ipcp_value VAL is orginated from parameter value of
+ self-feeding recursive function via some kind of pass-through jump
+ function. */
+
+static bool
+self_recursively_generated_p (ipcp_value<tree> *val)
+{
+ class ipa_node_params *info = NULL;
+
+ for (ipcp_value_source<tree> *src = val->sources; src; src = src->next)
+ {
+ cgraph_edge *cs = src->cs;
+
+ if (!src->val || cs->caller != cs->callee->function_symbol ())
+ return false;
+
+ if (src->val == val)
+ continue;
+
+ if (!info)
+ info = IPA_NODE_REF (cs->caller);
+
+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info,
+ src->index);
+ ipcp_lattice<tree> *src_lat;
+ ipcp_value<tree> *src_val;
+
+ if (src->offset == -1)
+ src_lat = &plats->itself;
+ else
+ {
+ struct ipcp_agg_lattice *src_aglat;
+
+ for (src_aglat = plats->aggs; src_aglat; src_aglat = src_aglat->next)
+ if (src_aglat->offset == src->offset)
+ break;
+
+ if (!src_aglat)
+ return false;
+
+ src_lat = src_aglat;
+ }
+
+ for (src_val = src_lat->values; src_val; src_val = src_val->next)
+ if (src_val == val)
+ break;
+
+ if (!src_val)
+ return false;
+ }
+
return true;
}
+/* A helper function that returns result of operation specified by OPCODE on
+ the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
+ value of SRC_VAL. If the operation is binary, OPND2 is a constant value
+ acting as its second operand. If non-NULL, RES_TYPE is expected type of
+ the result. */
+
+static tree
+get_val_across_arith_op (enum tree_code opcode,
+ tree opnd1_type,
+ tree opnd2,
+ ipcp_value<tree> *src_val,
+ tree res_type)
+{
+ tree opnd1 = src_val->value;
+
+ /* Skip source values that is incompatible with specified type. */
+ if (opnd1_type
+ && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
+ return NULL_TREE;
+
+ return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
+}
+
/* Propagate values through an arithmetic transformation described by a jump
function associated with edge CS, taking values from SRC_LAT and putting
them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
ipcp_value<tree> *src_val;
bool ret = false;
- /* Do not create new values when propagating within an SCC because if there
- are arithmetic functions with circular dependencies, there is infinite
- number of them and we would just make lattices bottom. If this condition
- is ever relaxed we have to detect self-feeding recursive calls in
- cgraph_edge_brings_value_p in a smarter way. */
+ /* Due to circular dependencies, propagating within an SCC through arithmetic
+ transformation would create infinite number of values. But for
+ self-feeding recursive function, we could allow propagation in a limited
+ count, and this can enable a simple kind of recursive function versioning.
+ For other scenario, we would just make lattices bottom. */
if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
- ret = dest_lat->set_contains_variable ();
+ {
+ int i;
+
+ int max_recursive_depth = opt_for_fn(cs->caller->decl,
+ param_ipa_cp_max_recursive_depth);
+ if (src_lat != dest_lat || max_recursive_depth < 1)
+ return dest_lat->set_contains_variable ();
+
+ /* No benefit if recursive execution is in low probability. */
+ if (cs->sreal_frequency () * 100
+ <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
+ param_ipa_cp_min_recursive_probability))
+ return dest_lat->set_contains_variable ();
+
+ auto_vec<ipcp_value<tree> *, 8> val_seeds;
+
+ for (src_val = src_lat->values; src_val; src_val = src_val->next)
+ {
+ /* Now we do not use self-recursively generated value as propagation
+ source, this is absolutely conservative, but could avoid explosion
+ of lattice's value space, especially when one recursive function
+ calls another recursive. */
+ if (self_recursively_generated_p (src_val))
+ {
+ ipcp_value_source<tree> *s;
+
+ /* If the lattice has already been propagated for the call site,
+ no need to do that again. */
+ for (s = src_val->sources; s; s = s->next)
+ if (s->cs == cs)
+ return dest_lat->set_contains_variable ();
+ }
+ else
+ val_seeds.safe_push (src_val);
+ }
+
+ gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
+
+ /* Recursively generate lattice values with a limited count. */
+ FOR_EACH_VEC_ELT (val_seeds, i, src_val)
+ {
+ for (int j = 1; j < max_recursive_depth; j++)
+ {
+ tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
+ src_val, res_type);
+ if (!cstval)
+ break;
+
+ ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
+ src_offset, &src_val, true);
+ gcc_checking_assert (src_val);
+ }
+ }
+ ret |= dest_lat->set_contains_variable ();
+ }
else
for (src_val = src_lat->values; src_val; src_val = src_val->next)
{
- tree opnd1 = src_val->value;
- tree cstval = NULL_TREE;
-
- /* Skip source values that is incompatible with specified type. */
- if (!opnd1_type
- || useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
- cstval = ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
+ /* Now we do not use self-recursively generated value as propagation
+ source, otherwise it is easy to make value space of normal lattice
+ overflow. */
+ if (self_recursively_generated_p (src_val))
+ {
+ ret |= dest_lat->set_contains_variable ();
+ continue;
+ }
+ tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
+ src_val, res_type);
if (cstval)
ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
src_offset);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
"param %i of %s is NULL or unsuitable for bits propagation\n",
- idx, cs->callee->name ());
+ idx, cs->callee->dump_name ());
return dest_lattice->set_to_bottom ();
}
value_range vr;
if (TREE_CODE_CLASS (operation) == tcc_unary)
- {
- ipa_vr_operation_and_type_effects (&vr,
- &src_lats->m_value_range.m_vr,
- operation, param_type,
- operand_type);
- }
+ ipa_vr_operation_and_type_effects (&vr,
+ &src_lats->m_value_range.m_vr,
+ operation, param_type,
+ operand_type);
/* A crude way to prevent unbounded number of value range updates
in SCC components. We should allow limited number of updates within
SCC, too. */
NOP_EXPR,
param_type,
jfunc->m_vr->type ()))
- vr.intersect (*jfunc->m_vr);
+ vr.intersect (jvr);
}
return dest_lat->meet_with (&vr);
}
unless there are too many already. If there are two many, return false. If
there are overlaps turn whole DEST_PLATS to bottom and return false. If any
skipped lattices were newly marked as containing variable, set *CHANGE to
- true. */
+ true. MAX_AGG_ITEMS is the maximum number of lattices. */
static bool
merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
struct ipcp_agg_lattice ***aglat,
- bool pre_existing, bool *change)
+ bool pre_existing, bool *change, int max_agg_items)
{
gcc_checking_assert (offset >= 0);
set_agg_lats_to_bottom (dest_plats);
return false;
}
- if (dest_plats->aggs_count == param_ipa_max_agg_items)
+ if (dest_plats->aggs_count == max_agg_items)
return false;
dest_plats->aggs_count++;
new_al = ipcp_agg_lattice_pool.allocate ();
ret |= set_agg_lats_contain_variable (dest_plats);
dst_aglat = &dest_plats->aggs;
+ int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
+ param_ipa_max_agg_items);
for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
src_aglat;
src_aglat = src_aglat->next)
if (new_offset < 0)
continue;
if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
- &dst_aglat, pre_existing, &ret))
+ &dst_aglat, pre_existing, &ret, max_agg_items))
{
struct ipcp_agg_lattice *new_al = *dst_aglat;
if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
return true;
+ int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
+ param_ipa_max_agg_items);
FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
{
HOST_WIDE_INT val_size;
val_size = tree_to_shwi (TYPE_SIZE (item->type));
if (merge_agg_lats_step (dest_plats, item->offset, val_size,
- &aglat, pre_existing, &ret))
+ &aglat, pre_existing, &ret, max_agg_items))
{
ret |= propagate_aggregate_lattice (cs, item, *aglat);
aglat = &(*aglat)->next;
if (avail < AVAIL_AVAILABLE)
continue;
isummary = ipa_fn_summaries->get (callee);
- if (!isummary->inlinable)
+ if (!isummary || !isummary->inlinable)
continue;
int size = ipa_size_summaries->get (callee)->size;
/* Return time bonus incurred because of HINTS. */
static int
-hint_time_bonus (ipa_hints hints)
+hint_time_bonus (cgraph_node *node, ipa_hints hints)
{
int result = 0;
if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
- result += param_ipa_cp_loop_hint_bonus;
+ result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
return result;
}
cloning goodness evaluation, do so. */
static inline int64_t
-incorporate_penalties (ipa_node_params *info, int64_t evaluation)
+incorporate_penalties (cgraph_node *node, ipa_node_params *info,
+ int64_t evaluation)
{
- if (info->node_within_scc)
+ if (info->node_within_scc && !info->node_is_self_scc)
evaluation = (evaluation
- * (100 - param_ipa_cp_recursion_penalty)) / 100;
+ * (100 - opt_for_fn (node->decl,
+ param_ipa_cp_recursion_penalty))) / 100;
if (info->node_calling_single_call)
evaluation = (evaluation
- * (100 - param_ipa_cp_single_call_penalty))
+ * (100 - opt_for_fn (node->decl,
+ param_ipa_cp_single_call_penalty)))
/ 100;
return evaluation;
gcc_assert (size_cost > 0);
class ipa_node_params *info = IPA_NODE_REF (node);
+ int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
if (max_count > profile_count::zero ())
{
int factor = RDIV (count_sum.probability_in
* 1000, REG_BR_PROB_BASE);
int64_t evaluation = (((int64_t) time_benefit * factor)
/ size_cost);
- evaluation = incorporate_penalties (info, evaluation);
+ evaluation = incorporate_penalties (node, info, evaluation);
if (dump_file && (dump_flags & TDF_DETAILS))
{
count_sum.dump (dump_file);
fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64
", threshold: %i\n",
- info->node_within_scc ? ", scc" : "",
+ info->node_within_scc
+ ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
info->node_calling_single_call ? ", single_call" : "",
- evaluation, param_ipa_cp_eval_threshold);
+ evaluation, eval_threshold);
}
- return evaluation >= param_ipa_cp_eval_threshold;
+ return evaluation >= eval_threshold;
}
else
{
int64_t evaluation = (((int64_t) time_benefit * freq_sum)
/ size_cost);
- evaluation = incorporate_penalties (info, evaluation);
+ evaluation = incorporate_penalties (node, info, evaluation);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " good_cloning_opportunity_p (time: %i, "
"size: %i, freq_sum: %i%s%s) -> evaluation: "
"%" PRId64 ", threshold: %i\n",
time_benefit, size_cost, freq_sum,
- info->node_within_scc ? ", scc" : "",
+ info->node_within_scc
+ ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
info->node_calling_single_call ? ", single_call" : "",
- evaluation, param_ipa_cp_eval_threshold);
+ evaluation, eval_threshold);
- return evaluation >= param_ipa_cp_eval_threshold;
+ return evaluation >= eval_threshold;
}
}
time_benefit = base_time.to_int ()
+ devirtualization_time_bonus (node, known_csts, known_contexts,
known_aggs)
- + hint_time_bonus (hints)
+ + hint_time_bonus (node, hints)
+ removable_params_cost + est_move_cost;
gcc_checking_assert (size >=0);
val->local_size_cost = size;
}
+/* Get the overall limit oof growth based on parameters extracted from growth.
+ it does not really make sense to mix functions with different overall growth
+ limits but it is possible and if it happens, we do not want to select one
+ limit at random. */
+
+static long
+get_max_overall_size (cgraph_node *node)
+{
+ long max_new_size = orig_overall_size;
+ long large_unit = opt_for_fn (node->decl, param_large_unit_insns);
+ if (max_new_size < large_unit)
+ max_new_size = large_unit;
+ int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
+ max_new_size += max_new_size * unit_growth / 100 + 1;
+ return max_new_size;
+}
+
/* Iterate over known values of parameters of NODE and estimate the local
effects in terms of time and size they have. */
known_aggs, &size, &time,
&base_time, &hints);
time -= devirt_bonus;
- time -= hint_time_bonus (hints);
+ time -= hint_time_bonus (node, hints);
time -= removable_params_cost;
size -= stats.n_calls * removable_params_cost;
stats.freq_sum, stats.count_sum,
size))
{
- if (size + overall_size <= max_new_size)
+ if (size + overall_size <= get_max_overall_size (node))
{
info->do_clone_for_all_contexts = true;
overall_size += size;
"known contexts, growth deemed beneficial.\n");
}
else if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, " Not cloning for all contexts because "
- "max_new_size would be reached with %li.\n",
+ fprintf (dump_file, " Not cloning for all contexts because "
+ "maximum unit size would be reached with %li.\n",
size + overall_size);
}
else if (dump_file && (dump_flags & TDF_DETAILS))
while (v)
{
struct cgraph_edge *cs;
+ class ipa_node_params *info = NULL;
+ bool self_scc = true;
for (cs = v->callees; cs; cs = cs->next_callee)
if (ipa_edge_within_scc (cs))
{
- IPA_NODE_REF (v)->node_within_scc = true;
+ cgraph_node *callee = cs->callee->function_symbol ();
+
+ if (v != callee)
+ self_scc = false;
+
+ if (!info)
+ {
+ info = IPA_NODE_REF (v);
+ info->node_within_scc = true;
+ }
+
if (propagate_constants_across_call (cs))
- push_node_to_stack (topo, cs->callee->function_symbol ());
+ push_node_to_stack (topo, callee);
}
+
+ if (info)
+ info->node_is_self_scc = self_scc;
+
v = pop_node_from_stack (topo);
}
max_count = max_count.max (node->count.ipa ());
}
- max_new_size = overall_size;
- if (max_new_size < param_large_unit_insns)
- max_new_size = param_large_unit_insns;
- max_new_size += max_new_size * param_ipcp_unit_growth / 100 + 1;
+ orig_overall_size = overall_size;
if (dump_file)
- fprintf (dump_file, "\noverall_size: %li, max_new_size: %li\n",
- overall_size, max_new_size);
+ fprintf (dump_file, "\noverall_size: %li\n", overall_size);
propagate_constants_topo (topo);
if (flag_checking)
src_data->next_clone = dst_edge;
}
-/* Return true is NODE is DEST or its clone for all contexts. */
+/* Return true is CS calls DEST or its clone for all contexts. When
+ ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
+ edges from/to an all-context clone. */
static bool
-same_node_or_its_all_contexts_clone_p (cgraph_node *node, cgraph_node *dest)
+calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
+ bool allow_recursion_to_clone)
{
- if (node == dest)
+ enum availability availability;
+ cgraph_node *callee = cs->callee->function_symbol (&availability);
+
+ if (availability <= AVAIL_INTERPOSABLE)
+ return false;
+ if (callee == dest)
return true;
+ if (!allow_recursion_to_clone && cs->caller == callee)
+ return false;
- class ipa_node_params *info = IPA_NODE_REF (node);
+ class ipa_node_params *info = IPA_NODE_REF (callee);
return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
}
cgraph_node *dest, ipcp_value<tree> *dest_val)
{
class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
- enum availability availability;
- cgraph_node *real_dest = cs->callee->function_symbol (&availability);
- if (availability <= AVAIL_INTERPOSABLE
- || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+ if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
|| caller_info->node_dead)
return false;
}
else
{
- /* At the moment we do not propagate over arithmetic jump functions in
- SCCs, so it is safe to detect self-feeding recursive calls in this
- way. */
if (src->val == dest_val)
return true;
ipcp_value<ipa_polymorphic_call_context> *)
{
class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
- enum availability avail;
- cgraph_node *real_dest = cs->callee->function_symbol (&avail);
- if (avail <= AVAIL_INTERPOSABLE
- || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+ if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
|| caller_info->node_dead)
return false;
if (!src->val)
*freq_sum = freq;
*count_sum = cnt;
*caller_count = count;
+
+ if (!hot && IPA_NODE_REF (dest)->node_within_scc)
+ {
+ struct cgraph_edge *cs;
+
+ /* Cold non-SCC source edge could trigger hot recursive execution of
+ function. Consider the case as hot and rely on following cost model
+ computation to further select right one. */
+ for (cs = dest->callers; cs; cs = cs->next_caller)
+ if (cs->caller == dest && cs->maybe_hot_p ())
+ return true;
+ }
+
return hot;
}
+/* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
+ to let a non-self-recursive caller be the first element. Thus, we can
+ simplify intersecting operations on values that arrive from all of these
+ callers, especially when there exists self-recursive call. Return true if
+ this kind of adjustment is possible. */
+
+static bool
+adjust_callers_for_value_intersection (vec<cgraph_edge *> callers,
+ cgraph_node *node)
+{
+ for (unsigned i = 0; i < callers.length (); i++)
+ {
+ cgraph_edge *cs = callers[i];
+
+ if (cs->caller != node)
+ {
+ if (i > 0)
+ {
+ callers[i] = callers[0];
+ callers[0] = cs;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
/* Return a vector of incoming edges that do bring value VAL to node DEST. It
is assumed their number is known and equal to CALLER_COUNT. */
}
}
+ if (caller_count > 1)
+ adjust_callers_for_value_intersection (ret, dest);
+
return ret;
}
{
struct ipa_replace_map *replace_map;
-
replace_map = ggc_alloc<ipa_replace_map> ();
if (dump_file)
{
for (cs = new_node->callees; cs; cs = cs->next_callee)
{
fprintf (dump_file, " edge to %s has count ",
- cs->callee->name ());
+ cs->callee->dump_name ());
cs->count.dump (dump_file);
fprintf (dump_file, "\n");
}
for (cs = orig_node->callees; cs; cs = cs->next_callee)
{
fprintf (dump_file, " edge to %s is left with ",
- cs->callee->name ());
+ cs->callee->dump_name ());
cs->count.dump (dump_file);
fprintf (dump_file, "\n");
}
remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
- new_sum.ipa ());
+
+ /* With partial train run we do not want to assume that original's
+ count is zero whenever we redurect all executed edges to clone.
+ Simply drop profile to local one in this case. */
+ if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
+ && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
+ && flag_profile_partial_training)
+ remainder = remainder.guessed_local ();
+
new_sum = orig_node_count.combine_with_ipa_count (new_sum);
new_node->count = new_sum;
orig_node->count = remainder;
return new_node;
}
-/* Return true, if JFUNC, which describes a i-th parameter of call CS, is a
- simple no-operation pass-through function to itself. */
+/* Return true if JFUNC, which describes a i-th parameter of call CS, is a
+ pass-through function to itself when the cgraph_node involved is not an
+ IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
+ no-operation pass-through. */
static bool
-self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i)
+self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
+ bool simple = true)
{
enum availability availability;
if (cs->caller == cs->callee->function_symbol (&availability)
&& availability > AVAIL_INTERPOSABLE
&& jfunc->type == IPA_JF_PASS_THROUGH
- && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR
- && ipa_get_jf_pass_through_formal_id (jfunc) == i)
+ && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
+ && ipa_get_jf_pass_through_formal_id (jfunc) == i
+ && IPA_NODE_REF (cs->caller)
+ && !IPA_NODE_REF (cs->caller)->ipcp_orig_node)
+ return true;
+ return false;
+}
+
+/* Return true if JFUNC, which describes a part of an aggregate represented or
+ pointed to by the i-th parameter of call CS, is a pass-through function to
+ itself when the cgraph_node involved is not an IPA-CP clone.. When
+ SIMPLE is true, further check if JFUNC is a simple no-operation
+ pass-through. */
+
+static bool
+self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
+ int i, bool simple = true)
+{
+ enum availability availability;
+ if (cs->caller == cs->callee->function_symbol (&availability)
+ && availability > AVAIL_INTERPOSABLE
+ && jfunc->jftype == IPA_JF_LOAD_AGG
+ && jfunc->offset == jfunc->value.load_agg.offset
+ && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
+ && jfunc->value.pass_through.formal_id == i
+ && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
+ && IPA_NODE_REF (cs->caller)
+ && !IPA_NODE_REF (cs->caller)->ipcp_orig_node)
return true;
return false;
}
struct ipa_jump_func *jump_func;
tree t;
- if (IPA_NODE_REF (cs->caller) && IPA_NODE_REF (cs->caller)->node_dead)
- continue;
-
if (!IPA_EDGE_REF (cs)
|| i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))
|| (i == 0
break;
}
jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
- if (self_recursive_pass_through_p (cs, jump_func, i))
- continue;
- t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func, type);
+ /* Besides simple pass-through jump function, arithmetic jump
+ function could also introduce argument-direct-pass-through for
+ self-feeding recursive call. For example,
+
+ fn (int i)
+ {
+ fn (i & 1);
+ }
+
+ Given that i is 0, recursive propagation via (i & 1) also gets
+ 0. */
+ if (self_recursive_pass_through_p (cs, jump_func, i, false))
+ {
+ gcc_assert (newval);
+ t = ipa_get_jf_arith_result (
+ ipa_get_jf_pass_through_operation (jump_func),
+ newval,
+ ipa_get_jf_pass_through_operand (jump_func),
+ type);
+ }
+ else
+ t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func,
+ type);
if (!t
|| (newval
&& !values_equal_for_ipcp_p (t, newval))
break;
if (aglat->offset - offset == item->offset)
{
- gcc_checking_assert (item->value);
- if (aglat->is_single_const ()
- && values_equal_for_ipcp_p (item->value,
- aglat->values->value))
- found = true;
+ if (aglat->is_single_const ())
+ {
+ tree value = aglat->values->value;
+
+ if (values_equal_for_ipcp_p (item->value, value))
+ found = true;
+ }
break;
}
aglat = aglat->next;
{
struct ipa_agg_value agg_value;
- agg_value.offset = agg_item->offset;
agg_value.value = value;
-
+ agg_value.offset = agg_item->offset;
inter.safe_push (agg_value);
}
}
break;
if (ti->offset == item->offset)
{
- tree value = ipa_agg_value_from_node (caller_info,
- cs->caller, ti);
- if (value
- && values_equal_for_ipcp_p (item->value, value))
+ tree value;
+
+ /* Besides simple pass-through aggregate jump function,
+ arithmetic aggregate jump function could also bring
+ same aggregate value as parameter passed-in for
+ self-feeding recursive call. For example,
+
+ fn (int *i)
+ {
+ int j = *i & 1;
+ fn (&j);
+ }
+
+ Given that *i is 0, recursive propagation via (*i & 1)
+ also gets 0. */
+ if (self_recursive_agg_pass_through_p (cs, ti, index,
+ false))
+ value = ipa_get_jf_arith_result (
+ ti->value.pass_through.operation,
+ item->value,
+ ti->value.pass_through.operand,
+ ti->type);
+ else
+ value = ipa_agg_value_from_node (caller_info,
+ cs->caller, ti);
+
+ if (value && values_equal_for_ipcp_p (item->value, value))
found = true;
break;
}
for (i = 0; i < count; i++)
{
- static vec<ipa_agg_value> values = vNULL;
class ipcp_param_lattices *plats;
bool interesting = false;
for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
if (plats->aggs_bottom)
return false;
- values = intersect_aggregates_with_edge (cs, i, values);
+ vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
if (!values.exists ())
return false;
return false;
}
}
+ values.release ();
}
return true;
}
perhaps_add_new_callers (node, val);
return false;
}
- else if (val->local_size_cost + overall_size > max_new_size)
+ else if (val->local_size_cost + overall_size > get_max_overall_size (node))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Ignoring candidate value because "
- "max_new_size would be reached with %li.\n",
+ "maximum unit size would be reached with %li.\n",
val->local_size_cost + overall_size);
return false;
}
if (info->do_clone_for_all_contexts)
{
struct cgraph_node *clone;
- vec<cgraph_edge *> callers;
+ vec<cgraph_edge *> callers = node->collect_callers ();
+
+ for (int i = callers.length () - 1; i >= 0; i--)
+ {
+ cgraph_edge *cs = callers[i];
+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+
+ if (caller_info && caller_info->node_dead)
+ callers.unordered_remove (i);
+ }
+
+ if (!adjust_callers_for_value_intersection (callers, node))
+ {
+ /* If node is not called by anyone, or all its caller edges are
+ self-recursive, the node is not really be in use, no need to
+ do cloning. */
+ callers.release ();
+ known_csts.release ();
+ known_contexts.release ();
+ info->do_clone_for_all_contexts = false;
+ return ret;
+ }
if (dump_file)
fprintf (dump_file, " - Creating a specialized node of %s "
"for all known contexts.\n", node->dump_name ());
- callers = node->collect_callers ();
find_more_scalar_values_for_callers_subset (node, known_csts, callers);
find_more_contexts_for_caller_subset (node, &known_contexts, callers);
ipa_agg_replacement_value *aggvals
if (dump_file)
fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
"; -fipa-bit-cp: disabled.\n",
- node->name ());
+ node->dump_name ());
continue;
}
ipa_node_params *info = IPA_NODE_REF (node);
bool found_useful_result = false;
- if (!opt_for_fn (node->decl, flag_ipa_vrp))
+ if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
{
if (dump_file)
fprintf (dump_file, "Not considering %s for VR discovery "
"and propagate; -fipa-ipa-vrp: disabled.\n",
- node->name ());
+ node->dump_name ());
continue;
}
{
max_count = profile_count::uninitialized ();
overall_size = 0;
- max_new_size = 0;
+ orig_overall_size = 0;
ipcp_free_transformation_sum ();
}