/* { dg-final { scan-tree-dump-not "Invalid sum" "ch2"} } */
/* dom2 optimizes out the redundant test for loop invariant v/q
which leads to inconsistent profile. */
-/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" { xfail *-*-* }} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
true otherwise.
ELIMINATED_EDGE is an edge that is known to be removed in the dupicated
- region. */
+ region. ORIG_ELIMINATED_EDGES, if non-NULL is set of edges known to be
+ removed from the original region. */
bool
gimple_duplicate_sese_region (edge entry, edge exit,
basic_block *region, unsigned n_region,
basic_block *region_copy,
bool update_dominance,
- edge eliminated_edge)
+ edge eliminated_edge,
+ hash_set <edge> *orig_eliminated_edges)
{
unsigned i;
bool free_region_copy = false, copying_header = false;
split_edge_bb_loc (entry), update_dominance);
if (total_count.initialized_p () && entry_count.initialized_p ())
{
- if (!eliminated_edge)
+ if (!eliminated_edge
+ && (!orig_eliminated_edges || orig_eliminated_edges->is_empty ()))
{
scale_bbs_frequencies_profile_count (region, n_region,
total_count - entry_count,
if (cond1) <- this condition will become false
and we update probabilities
goto loop_exit;
- if (cond2)
+ if (cond2) <- this condition is loop invariant
goto loop_exit;
goto loop_header <- this will be redirected to loop.
// region_copy_end
if (cond1) <- we need to update probabbility here
goto loop_exit;
if (cond2) <- and determine scaling factor here.
+ moreover cond2 is now always true
goto loop_exit;
else
goto loop;
but only consumer so far is tree-ssa-loop-ch and it uses only this
to handle the common case of peeling headers which have
conditionals known to be always true upon entry. */
- gcc_assert (eliminated_edge->src == region[0]
- && EDGE_COUNT (region[0]->succs) == 2
- && copying_header);
-
- edge e, e_copy, eliminated_edge_copy;
- if (EDGE_SUCC (region[0], 0) == eliminated_edge)
- {
- e = EDGE_SUCC (region[0], 1);
- e_copy = EDGE_SUCC (region_copy[0], 1);
- eliminated_edge_copy = EDGE_SUCC (region_copy[0], 0);
- }
- else
+ gcc_checking_assert (copying_header);
+ for (unsigned int i = 0; i < n_region; i++)
{
- e = EDGE_SUCC (region[0], 0);
- e_copy = EDGE_SUCC (region_copy[0], 0);
- eliminated_edge_copy = EDGE_SUCC (region_copy[0], 1);
- }
- gcc_checking_assert (e != e_copy
- && eliminated_edge_copy != eliminated_edge
- && eliminated_edge_copy->dest
- == eliminated_edge->dest);
-
+ edge exit_e, exit_e_copy, e, e_copy;
+ if (EDGE_COUNT (region[i]->succs) == 1)
+ {
+ region_copy[i]->count = entry_count;
+ region[i]->count -= entry_count;
+ continue;
+ }
- /* Handle first basic block in duplicated region as in the
- non-eliminating case. */
- scale_bbs_frequencies_profile_count (region_copy, n_region,
- entry_count, total_count);
- /* Now update redirecting eliminated edge to the other edge.
- Actual CFG update is done by caller. */
- e_copy->probability = profile_probability::always ();
- eliminated_edge_copy->probability = profile_probability::never ();
- /* Header copying is a special case of jump threading, so use
- common code to update loop body exit condition. */
- update_bb_profile_for_threading (region[0], e_copy->count (), e);
- /* If we duplicated more conditionals first scale the profile of
- rest of the preheader. Then work out the probability of
- entering the loop and scale rest of the loop. */
- if (n_region > 1)
- {
- scale_bbs_frequencies_profile_count (region_copy + 1,
- n_region - 1,
- e_copy->count (),
- region_copy[1]->count);
- scale_bbs_frequencies_profile_count (region + 1, n_region - 1,
- e->count (),
- region[1]->count);
+ gcc_checking_assert (EDGE_COUNT (region[i]->succs) == 2);
+ if (loop_exit_edge_p (region[0]->loop_father,
+ EDGE_SUCC (region[i], 0)))
+ {
+ exit_e = EDGE_SUCC (region[i], 0);
+ exit_e_copy = EDGE_SUCC (region_copy[i], 0);
+ e = EDGE_SUCC (region[i], 1);
+ e_copy = EDGE_SUCC (region_copy[i], 1);
+ }
+ else
+ {
+ exit_e = EDGE_SUCC (region[i], 1);
+ exit_e_copy = EDGE_SUCC (region_copy[i], 1);
+ e = EDGE_SUCC (region[i], 0);
+ e_copy = EDGE_SUCC (region_copy[i], 0);
+ }
+ gcc_assert (i == n_region - 1
+ || (e->dest == region[i + 1]
+ && e_copy->dest == region_copy[i + 1]));
+ region_copy[i]->count = entry_count;
+ profile_count exit_e_count = exit_e->count ();
+ if (eliminated_edge == exit_e)
+ {
+ /* Update profile and the conditional.
+ CFG update is done by caller. */
+ e_copy->probability = profile_probability::always ();
+ exit_e_copy->probability = profile_probability::never ();
+ gcond *cond_stmt
+ = as_a <gcond *>(*gsi_last_bb (region_copy[i]));
+ if (e_copy->flags & EDGE_TRUE_VALUE)
+ gimple_cond_make_true (cond_stmt);
+ else
+ gimple_cond_make_false (cond_stmt);
+ update_stmt (cond_stmt);
+ /* Header copying is a special case of jump threading, so use
+ common code to update loop body exit condition. */
+ update_bb_profile_for_threading (region[i], entry_count, e);
+ eliminated_edge = NULL;
+ }
+ else
+ region[i]->count -= region_copy[i]->count;
+ if (orig_eliminated_edges->contains (exit_e))
+ {
+ orig_eliminated_edges->remove (exit_e);
+ /* All exits will happen in exit_e_copy which is out of the
+ loop, so increase probability accordingly.
+ If the edge is eliminated_edge we already corrected
+ profile above. */
+ if (entry_count.nonzero_p () && eliminated_edge != exit_e)
+ set_edge_probability_and_rescale_others
+ (exit_e_copy, exit_e_count.probability_in
+ (entry_count));
+ /* Eliminate in-loop conditional. */
+ e->probability = profile_probability::always ();
+ exit_e->probability = profile_probability::never ();
+ gcond *cond_stmt = as_a <gcond *>(*gsi_last_bb (region[i]));
+ if (e->flags & EDGE_TRUE_VALUE)
+ gimple_cond_make_true (cond_stmt);
+ else
+ gimple_cond_make_false (cond_stmt);
+ update_stmt (cond_stmt);
+ }
+ entry_count = e_copy->count ();
}
+ /* Be sure that we seen all edges we are supposed to update. */
+ gcc_checking_assert (!eliminated_edge
+ && orig_eliminated_edges->is_empty ());
}
}
extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
extern basic_block split_edge_bb_loc (edge);
extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, unsigned,
- basic_block *, bool, edge);
+ basic_block *, bool, edge,
+ hash_set <edge> *);
extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
basic_block *);
extern void gather_blocks_in_sese_region (basic_block entry, basic_block exit,
return r == desired_static_range ? ret_e : NULL;
}
+/* Return true if OP is invariant. */
+
+static bool
+loop_invariant_op_p (class loop *loop,
+ tree op)
+{
+ if (is_gimple_min_invariant (op))
+ return true;
+ if (SSA_NAME_IS_DEFAULT_DEF (op)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op))))
+ return true;
+ return gimple_uid (SSA_NAME_DEF_STMT (op)) & 2;
+}
+
+/* Return true if OP looks like it is derived from IV. */
+
+static bool
+loop_iv_derived_p (class loop *loop,
+ tree op)
+{
+ /* Always check for invariant first. */
+ gcc_checking_assert (!is_gimple_min_invariant (op)
+ && !SSA_NAME_IS_DEFAULT_DEF (op)
+ && flow_bb_inside_loop_p (loop,
+ gimple_bb (SSA_NAME_DEF_STMT (op))));
+ return gimple_uid (SSA_NAME_DEF_STMT (op)) & 1;
+}
+
/* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
instructions should be duplicated, limit is decreased by the actual
amount. */
static bool
should_duplicate_loop_header_p (basic_block header, class loop *loop,
- int *limit)
+ int *limit,
+ hash_set <edge> *invariant_exits)
{
gimple_stmt_iterator bsi;
for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi);
gsi_next (&psi))
- {
- gphi *phi = psi.phi ();
- tree res = gimple_phi_result (phi);
- if (INTEGRAL_TYPE_P (TREE_TYPE (res))
- || POINTER_TYPE_P (TREE_TYPE (res)))
- gimple_set_uid (phi, 1 /* IV */);
- else
- gimple_set_uid (phi, 0);
- }
+ /* If this is actual loop header PHIs indicate that the SSA_NAME
+ may be IV. Otherwise just give up. */
+ if (header == loop->header)
+ {
+ gphi *phi = psi.phi ();
+ tree res = gimple_phi_result (phi);
+ if (INTEGRAL_TYPE_P (TREE_TYPE (res))
+ || POINTER_TYPE_P (TREE_TYPE (res)))
+ gimple_set_uid (phi, 1 /* IV */);
+ else
+ gimple_set_uid (phi, 0);
+ }
+ else
+ gimple_set_uid (psi.phi (), 0);
/* Count number of instructions and punt on calls.
Populate stmts INV/IV flag to later apply heuristics to the
/* Classify the stmt based on whether its computation is based
on a IV or whether it is invariant in the loop. */
gimple_set_uid (last, 0);
- if (!gimple_vuse (last))
+ if (!gimple_vuse (last)
+ && gimple_code (last) != GIMPLE_ASM
+ && (gimple_code (last) != GIMPLE_CALL
+ || gimple_call_flags (last) & ECF_CONST))
{
bool inv = true;
- bool iv = false;
+ bool iv = true;
ssa_op_iter i;
tree op;
FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE)
- if (!SSA_NAME_IS_DEFAULT_DEF (op)
- && flow_bb_inside_loop_p (loop,
- gimple_bb (SSA_NAME_DEF_STMT (op))))
+ if (!loop_invariant_op_p (loop, op))
{
- if (!(gimple_uid (SSA_NAME_DEF_STMT (op)) & 2 /* INV */))
+ if (!loop_iv_derived_p (loop, op))
+ {
+ inv = false;
+ iv = false;
+ break;
+ }
+ else
inv = false;
- if (gimple_uid (SSA_NAME_DEF_STMT (op)) & 1 /* IV */)
- iv = true;
}
gimple_set_uid (last, (iv ? 1 : 0) | (inv ? 2 : 0));
}
the loop further. Unless this is the original loop header. */
tree lhs = gimple_cond_lhs (last);
tree rhs = gimple_cond_rhs (last);
+ bool lhs_invariant = loop_invariant_op_p (loop, lhs);
+ bool rhs_invariant = loop_invariant_op_p (loop, rhs);
+ if (lhs_invariant && rhs_invariant)
+ {
+ if (invariant_exits)
+ {
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, header->succs)
+ if (loop_exit_edge_p (loop, e))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ " Will elliminate invariant exit %i->%i\n",
+ e->src->index, e->dest->index);
+ invariant_exits->add (e);
+ }
+ }
+ return true;
+ }
+ /* TODO: This is heuristics that claims that IV based ocnditionals will
+ likely be optimized out in duplicated header. We could use ranger
+ query instead to tell this more precisely. */
if (header != loop->header
- && ((TREE_CODE (lhs) == SSA_NAME
- && !SSA_NAME_IS_DEFAULT_DEF (lhs)
- && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (lhs)))
- && gimple_uid (SSA_NAME_DEF_STMT (lhs)) == 0)
- || (TREE_CODE (rhs) == SSA_NAME
- && !SSA_NAME_IS_DEFAULT_DEF (rhs)
- && flow_bb_inside_loop_p (loop,
- gimple_bb (SSA_NAME_DEF_STMT (rhs)))
- && gimple_uid (SSA_NAME_DEF_STMT (rhs)) == 0)))
+ && ((!lhs_invariant && !loop_iv_derived_p (loop, lhs))
+ || (!rhs_invariant && !loop_iv_derived_p (loop, rhs))))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
continue;
}
- if (should_duplicate_loop_header_p (header, loop, &remaining_limit))
+ if (should_duplicate_loop_header_p (header, loop, &remaining_limit, NULL))
candidates.safe_push ({loop, static_exit});
}
/* Do not use ranger after we change the IL and not have updated SSA. */
profile_count entry_count = profile_count::zero ();
edge e;
edge_iterator ei;
+ hash_set <edge> invariant_exits;
FOR_EACH_EDGE (e, ei, loop->header->preds)
if (e->src != loop->latch)
entry_count += e->count ();
- while (should_duplicate_loop_header_p (header, loop, &remaining_limit))
+ while (should_duplicate_loop_header_p (header, loop, &remaining_limit,
+ &invariant_exits))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Will duplicate bb %i\n", header->index);
propagate_threaded_block_debug_into (exit->dest, entry->dest);
if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs,
- true, candidate.static_exit))
+ true, candidate.static_exit,
+ &invariant_exits))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Duplication failed.\n");