free (bbs);
}
+/* Scales the frequencies of all basic blocks in LOOP that are strictly
+ dominated by BB by NUM/DEN. */
+
+void
+scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
+ profile_count num, profile_count den)
+{
+ basic_block son;
+
+ if (!den.nonzero_p () && !(num == profile_count::zero ()))
+ return;
+ auto_vec <basic_block, 8> worklist;
+ worklist.safe_push (bb);
+
+ while (!worklist.is_empty ())
+ for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
+ son;
+ son = next_dom_son (CDI_DOMINATORS, son))
+ {
+ if (!flow_bb_inside_loop_p (loop, son))
+ continue;
+ son->count = son->count.apply_scale (num, den);
+ worklist.safe_push (son);
+ }
+}
+
/* Scale profile in LOOP by P.
If ITERATION_BOUND is not -1, scale even further if loop is predicted
to iterate too many times.
if (other_edge && other_edge->dest == loop->latch)
loop->latch->count -= new_exit_count - old_exit_count;
else
- {
- basic_block *body = get_loop_body (loop);
- profile_count new_count = exit_edge->src->count - new_exit_count;
- profile_count old_count = exit_edge->src->count - old_exit_count;
-
- for (unsigned int i = 0; i < loop->num_nodes; i++)
- if (body[i] != exit_edge->src
- && dominated_by_p (CDI_DOMINATORS, body[i], exit_edge->src))
- body[i]->count = body[i]->count.apply_scale (new_count,
- old_count);
-
- free (body);
- }
+ scale_dominated_blocks_in_loop (loop, exit_edge->src,
+ exit_edge->src->count - new_exit_count,
+ exit_edge->src->count - old_exit_count);
}
else if (dump_file && (dump_flags & TDF_DETAILS))
{
should've managed the flags so all except for original loop
has won't exist set. */
scale_act = wanted_count.probability_in (count_in);
+
/* Now simulate the duplication adjustments and compute header
frequency of the last copy. */
for (i = 0; i < ndupl; i++)
profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
? prob_pass_wont_exit
: prob_pass_thru;
- profile_probability p = prob_pass_main;
- profile_count scale_main_den = count_in;
- for (i = 0; i < ndupl; i++)
+ if (!(flags & DLTHE_FLAG_FLAT_PROFILE))
{
- scale_main_den += count_in.apply_probability (p);
- p = p * scale_step[i];
+ profile_probability p = prob_pass_main;
+ profile_count scale_main_den = count_in;
+ for (i = 0; i < ndupl; i++)
+ {
+ scale_main_den += count_in.apply_probability (p);
+ p = p * scale_step[i];
+ }
+ /* If original loop is executed COUNT_IN times, the unrolled
+ loop will account SCALE_MAIN_DEN times. */
+ scale_main = count_in.probability_in (scale_main_den);
}
- /* If original loop is executed COUNT_IN times, the unrolled
- loop will account SCALE_MAIN_DEN times. */
- scale_main = count_in.probability_in (scale_main_den);
+ else
+ scale_main = profile_probability::always ();
scale_act = scale_main * prob_pass_main;
}
else
*exit_bound = bound;
}
-/* Scales the frequencies of all basic blocks in LOOP that are strictly
- dominated by BB by NUM/DEN. */
-
-static void
-scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
- profile_count num, profile_count den)
-{
- basic_block son;
-
- if (!den.nonzero_p () && !(num == profile_count::zero ()))
- return;
-
- for (son = first_dom_son (CDI_DOMINATORS, bb);
- son;
- son = next_dom_son (CDI_DOMINATORS, son))
- {
- if (!flow_bb_inside_loop_p (loop, son))
- continue;
- scale_bbs_frequencies_profile_count (&son, 1, num, den);
- scale_dominated_blocks_in_loop (loop, son, num, den);
- }
-}
-
-/* Return estimated niter for LOOP after unrolling by FACTOR times. */
-
-gcov_type
-niter_for_unrolled_loop (class loop *loop, unsigned factor)
-{
- gcc_assert (factor != 0);
- bool profile_p = false;
- gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
- /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the
- "+ 1" converts latch iterations to loop iterations and the "- 1"
- converts back. */
- gcov_type new_est_niter = est_niter / factor;
-
- if (est_niter == -1)
- return -1;
-
- /* Without profile feedback, loops for which we do not know a better estimate
- are assumed to roll 10 times. When we unroll such loop, it appears to
- roll too little, and it may even seem to be cold. To avoid this, we
- ensure that the created loop appears to roll at least 5 times (but at
- most as many times as before unrolling). Don't do adjustment if profile
- feedback is present. */
- if (new_est_niter < 5 && !profile_p)
- {
- if (est_niter < 5)
- new_est_niter = est_niter;
- else
- new_est_niter = 5;
- }
-
- if (loop->any_upper_bound)
- {
- /* As above, this is really CEIL (upper_bound + 1, factor) - 1. */
- widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound,
- factor);
- if (wi::ltu_p (bound, new_est_niter))
- new_est_niter = bound.to_uhwi ();
- }
-
- return new_est_niter;
-}
-
/* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge
whose source block dominates the latch. DESC describes the number of
iterations of LOOP.
transform_callback transform,
void *data)
{
- gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
enum tree_code exit_cmp;
tree enter_main_cond, exit_base, exit_step, exit_bound;
+ bool flat = maybe_flat_loop_profile (loop);
determine_exit_conditions (loop, desc, factor,
&enter_main_cond, &exit_base, &exit_step,
&exit_cmp, &exit_bound);
bool single_loop_p = !exit_base;
- /* Let us assume that the unrolled loop is quite likely to be entered. */
- profile_probability prob_entry;
- if (integer_nonzerop (enter_main_cond))
- prob_entry = profile_probability::always ();
- else
- prob_entry = profile_probability::guessed_always ()
- .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
-
gcond *exit_if = nullptr;
class loop *new_loop = nullptr;
edge new_exit;
if (!single_loop_p)
{
- edge exit = single_dom_exit (loop);
+ profile_count entry_count = loop_preheader_edge (loop)->src->count;
+ /* Let us assume that the unrolled loop is quite likely to be entered. */
+ profile_probability prob_entry;
+ if (integer_nonzerop (enter_main_cond))
+ prob_entry = profile_probability::always ();
+ else
+ prob_entry = profile_probability::guessed_always ()
+ .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
+
/* The values for scales should keep profile consistent, and somewhat
- close to correct.
-
- TODO: The current value of SCALE_REST makes it appear that the loop
- that is created by splitting the remaining iterations of the unrolled
- loop is executed the same number of times as the original loop, and
- with the same frequencies, which is obviously wrong. This does not
- appear to cause problems, so we do not bother with fixing it for now.
- To make the profile correct, we would need to change the probability
- of the exit edge of the loop, and recompute the distribution of
- frequencies in its body because of this change (scale the frequencies
- of blocks before and after the exit by appropriate factors). */
- profile_probability scale_unrolled = prob_entry;
+ close to correct. */
new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
- prob_entry.invert (), scale_unrolled,
- profile_probability::guessed_always (),
+ prob_entry.invert (),
+ prob_entry,
+ /* We will later redirect exit from vectorized
+ loop to new_loop. */
+ profile_probability::always (),
true);
gcc_assert (new_loop != NULL);
update_ssa (TODO_update_ssa_no_phi);
edge precond_edge = single_pred_edge (rest);
split_edge (loop_latch_edge (loop));
basic_block exit_bb = single_pred (loop->latch);
+ edge exit = single_dom_exit (loop);
/* Since the exit edge will be removed, the frequency of all the blocks
- in the loop that are dominated by it must be scaled by
- 1 / (1 - exit->probability). */
+ in the loop that are dominated by it must be scaled. */
if (exit->probability.initialized_p ())
scale_dominated_blocks_in_loop (loop, exit->src,
/* We are scaling up here so
probability does not fit. */
- loop->header->count,
- loop->header->count
- - loop->header->count.apply_probability
- (exit->probability));
+ exit->src->count,
+ exit->src->count - exit->count ());
gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
rescan_loop_exit (new_exit, true, false);
/* Set the probability of new exit to the same of the old one. Fix
- the frequency of the latch block, by scaling it back by
- 1 - exit->probability. */
+ the count of the latch block. */
new_exit->probability = exit->probability;
edge new_nonexit = single_pred_edge (loop->latch);
new_nonexit->probability = exit->probability.invert ();
new_nonexit->flags = EDGE_TRUE_VALUE;
- if (new_nonexit->probability.initialized_p ())
- scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability);
+ set_edge_probability_and_rescale_others
+ (exit, profile_probability::never ());
+ loop->latch->count = new_nonexit->count ();
edge old_entry = loop_preheader_edge (loop);
edge new_entry = loop_preheader_edge (new_loop);
}
remove_path (exit);
+ /* We will later redirect exit from vectorized loop to new_loop. */
+ loop_preheader_edge (new_loop)->src->count = entry_count;
/* The epilog loop latch executes at most factor - 1 times.
Since the epilog is entered unconditionally it will need to handle
up to factor executions of its body. */
- new_loop->any_upper_bound = 1;
+ new_loop->any_upper_bound = true;
new_loop->nb_iterations_upper_bound = factor - 1;
+ /* We do not really know estimate on number of iterations, since we do not
+ track any estimates modulo unroll factor.
+ Drop estimate from loop_info and scale loop profile.
+ It may be more realistic to scale loop profile to factor / 2 - 1,
+ but vectorizer also uses factor - 1. */
+ new_loop->any_estimate = false;
+ scale_loop_profile (new_loop, profile_probability::always (), factor - 1);
}
else
new_exit = single_dom_exit (loop);
auto_vec<edge> to_remove;
bool ok
- = gimple_duplicate_loop_body_to_header_edge (loop, loop_latch_edge (loop),
- factor - 1, wont_exit,
- new_exit, &to_remove,
- DLTHE_FLAG_UPDATE_FREQ);
+ = gimple_duplicate_loop_body_to_header_edge
+ (loop, loop_latch_edge (loop), factor - 1, wont_exit,
+ new_exit, &to_remove,
+ DLTHE_FLAG_UPDATE_FREQ | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
gcc_assert (ok);
for (edge e : to_remove)
update_ssa (TODO_update_ssa);
new_exit = single_dom_exit (loop);
+
+ /* gimple_duplicate_loop_body_to_header_edge depending on
+ DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
+ or scales it down accordingly.
+ However exit edge probability is kept as original. Fix it if needed
+ and compensate. */
+ profile_probability new_prob
+ = loop_preheader_edge
+ (loop)->count ().probability_in (new_exit->src->count);
+ if (!(new_prob == new_exit->probability))
+ {
+ profile_count old_count = new_exit->src->count - new_exit->count ();
+ set_edge_probability_and_rescale_others (new_exit, new_prob);
+ profile_count new_count = new_exit->src->count - new_exit->count ();
+ scale_dominated_blocks_in_loop (loop, new_exit->src,
+ new_count, old_count);
+ }
if (!single_loop_p)
{
- /* Ensure that the frequencies in the loop match the new estimated
- number of iterations, and change the probability of the new
- exit edge. */
-
- profile_count freq_h = loop->header->count;
- profile_count freq_e = (loop_preheader_edge (loop))->count ();
- if (freq_h.nonzero_p ())
- {
- /* Avoid dropping loop body profile counter to 0 because of zero
- count in loop's preheader. */
- if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ()))
- freq_e = freq_e.force_nonzero ();
- scale_loop_frequencies (loop, freq_e.probability_in (freq_h));
- }
-
- basic_block rest = new_exit->dest;
- new_exit->probability
- = (profile_probability::always () / (new_est_niter + 1));
-
- rest->count += new_exit->count ();
-
- edge new_nonexit = single_pred_edge (loop->latch);
- profile_probability prob = new_nonexit->probability;
- new_nonexit->probability = new_exit->probability.invert ();
- prob = new_nonexit->probability / prob;
- if (prob.initialized_p ())
- scale_bbs_frequencies (&loop->latch, 1, prob);
-
/* Finally create the new counter for number of iterations and add
the new exit instruction. */
tree ctr_before, ctr_after;
gimple_cond_set_rhs (exit_if, exit_bound);
update_stmt (exit_if);
}
- else
- {
- /* gimple_duplicate_loop_to_header_edge has adjusted the loop body's
- original profile counts in line with the unroll factor. However,
- the old counts might not have been consistent with the old
- iteration count.
-
- Therefore, if the iteration count is known exactly, make sure that the
- profile counts of the loop header (and any other blocks that might be
- executed in the final iteration) are consistent with the combination
- of (a) the incoming profile count and (b) the new iteration count. */
- profile_count in_count = loop_preheader_edge (loop)->count ();
- profile_count old_header_count = loop->header->count;
- if (in_count.nonzero_p ()
- && old_header_count.nonzero_p ()
- && TREE_CODE (desc->niter) == INTEGER_CST)
- {
- /* The + 1 converts latch counts to iteration counts. */
- profile_count new_header_count = in_count * (new_est_niter + 1);
- basic_block *body = get_loop_body (loop);
- scale_bbs_frequencies_profile_count (body, loop->num_nodes,
- new_header_count,
- old_header_count);
- free (body);
- }
-
- /* gimple_duplicate_loop_to_header_edge discarded FACTOR - 1
- exit edges and adjusted the loop body's profile counts for the
- new probabilities of the remaining non-exit edges. However,
- the remaining exit edge still has the same probability as it
- did before, even though it is now more likely.
-
- Therefore, all blocks executed after a failed exit test now have
- a profile count that is too high, and the sum of the profile counts
- for the header's incoming edges is greater than the profile count
- of the header itself.
-
- Adjust the profile counts of all code in the loop body after
- the exit test so that the sum of the counts on entry to the
- header agree. */
- profile_count old_latch_count = loop_latch_edge (loop)->count ();
- profile_count new_latch_count = loop->header->count - in_count;
- if (old_latch_count.nonzero_p () && new_latch_count.nonzero_p ())
- scale_dominated_blocks_in_loop (loop, new_exit->src, new_latch_count,
- old_latch_count);
-
- /* Set the probability of the exit edge based on NEW_EST_NITER
- (which estimates latch counts rather than iteration counts).
- Update the probabilities of other edges to match.
-
- If the profile counts are large enough to give the required
- precision, the updates above will have made
-
- e->dest->count / e->src->count ~= new e->probability
-
- for every outgoing edge e of NEW_EXIT->src. */
- profile_probability new_exit_prob
- = profile_probability::always () / (new_est_niter + 1);
- change_edge_frequency (new_exit, new_exit_prob);
- }
+ if (loop->any_upper_bound)
+ loop->nb_iterations_upper_bound = wi::udiv_floor
+ (loop->nb_iterations_upper_bound + 1, factor) - 1;
+ if (loop->any_likely_upper_bound)
+ loop->nb_iterations_likely_upper_bound = wi::udiv_floor
+ (loop->nb_iterations_likely_upper_bound + 1, factor) - 1;
+ if (loop->any_estimate)
+ loop->nb_iterations_estimate = wi::udiv_floor
+ (loop->nb_iterations_estimate + 1, factor) - 1;
checking_verify_flow_info ();
checking_verify_loop_structure ();