1 /* Support routines for Splitting Paths to loop backedges
2 Copyright (C) 2015-2019 Free Software Foundation, Inc.
3 Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
27 #include "tree-pass.h"
31 #include "gimple-iterator.h"
34 #include "gimple-ssa.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
38 /* Given LATCH, the latch block in a loop, see if the shape of the
39 path reaching LATCH is suitable for being split by duplication.
40 If so, return the block that will be duplicated into its predecessor
41 paths. Else return NULL. */
44 find_block_to_duplicate_for_splitting_paths (basic_block latch
)
46 /* We should have simple latches at this point. So the latch should
47 have a single successor. This implies the predecessor of the latch
48 likely has the loop exit. And it's that predecessor we're most
49 interested in. To keep things simple, we're going to require that
50 the latch have a single predecessor too. */
51 if (single_succ_p (latch
) && single_pred_p (latch
))
53 basic_block bb
= get_immediate_dominator (CDI_DOMINATORS
, latch
);
54 gcc_assert (single_pred_edge (latch
)->src
== bb
);
56 /* If BB has been marked as not to be duplicated, then honor that
61 gimple
*last
= gsi_stmt (gsi_last_nondebug_bb (bb
));
62 /* The immediate dominator of the latch must end in a conditional. */
63 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
66 /* We're hoping that BB is a join point for an IF-THEN-ELSE diamond
67 region. Verify that it is.
69 First, verify that BB has two predecessors (each arm of the
70 IF-THEN-ELSE) and two successors (the latch and exit). */
71 if (EDGE_COUNT (bb
->preds
) == 2 && EDGE_COUNT (bb
->succs
) == 2)
73 /* Now verify that BB's immediate dominator ends in a
74 conditional as well. */
75 basic_block bb_idom
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
76 gimple
*last
= gsi_stmt (gsi_last_nondebug_bb (bb_idom
));
77 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
80 /* And that BB's immediate dominator's successors are the
81 predecessors of BB or BB itself. */
82 if (!(EDGE_PRED (bb
, 0)->src
== bb_idom
83 || find_edge (bb_idom
, EDGE_PRED (bb
, 0)->src
))
84 || !(EDGE_PRED (bb
, 1)->src
== bb_idom
85 || find_edge (bb_idom
, EDGE_PRED (bb
, 1)->src
)))
88 /* And that the predecessors of BB each have a single successor
89 or are BB's immediate domiator itself. */
90 if (!(EDGE_PRED (bb
, 0)->src
== bb_idom
91 || single_succ_p (EDGE_PRED (bb
, 0)->src
))
92 || !(EDGE_PRED (bb
, 1)->src
== bb_idom
93 || single_succ_p (EDGE_PRED (bb
, 1)->src
)))
96 /* So at this point we have a simple diamond for an IF-THEN-ELSE
97 construct starting at BB_IDOM, with a join point at BB. BB
98 pass control outside the loop or to the loop latch.
100 We're going to want to create two duplicates of BB, one for
101 each successor of BB_IDOM. */
108 /* Return the number of non-debug statements in a block. */
110 count_stmts_in_block (basic_block bb
)
112 gimple_stmt_iterator gsi
;
113 unsigned int num_stmts
= 0;
115 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
117 gimple
*stmt
= gsi_stmt (gsi
);
118 if (!is_gimple_debug (stmt
))
124 /* Return TRUE if CODE represents a tree code that is not likely to
125 be easily if-convertable because it likely expands into multiple
126 insns, FALSE otherwise. */
128 poor_ifcvt_candidate_code (enum tree_code code
)
130 return (code
== MIN_EXPR
134 || code
== CALL_EXPR
);
137 /* Return TRUE if BB is a reasonable block to duplicate by examining
138 its size, false otherwise. BB will always be a loop latch block.
142 We do not want to spoil if-conversion if at all possible.
144 Most of the benefit seems to be from eliminating the unconditional
145 jump rather than CSE/DCE opportunities. So favor duplicating
146 small latches. A latch with just a conditional branch is ideal.
148 CSE/DCE opportunties crop up when statements from the predecessors
149 feed statements in the latch and allow statements in the latch to
153 is_feasible_trace (basic_block bb
)
155 basic_block pred1
= EDGE_PRED (bb
, 0)->src
;
156 basic_block pred2
= EDGE_PRED (bb
, 1)->src
;
157 int num_stmts_in_join
= count_stmts_in_block (bb
);
158 int num_stmts_in_pred1
159 = EDGE_COUNT (pred1
->succs
) == 1 ? count_stmts_in_block (pred1
) : 0;
160 int num_stmts_in_pred2
161 = EDGE_COUNT (pred2
->succs
) == 1 ? count_stmts_in_block (pred2
) : 0;
163 /* This is meant to catch cases that are likely opportunities for
164 if-conversion. Essentially we look for the case where
165 BB's predecessors are both single statement blocks where
166 the output of that statement feed the same PHI in BB. */
167 if (num_stmts_in_pred1
== 1 && num_stmts_in_pred2
== 1)
169 gimple
*stmt1
= last_and_only_stmt (pred1
);
170 gimple
*stmt2
= last_and_only_stmt (pred2
);
173 && gimple_code (stmt1
) == GIMPLE_ASSIGN
174 && gimple_code (stmt2
) == GIMPLE_ASSIGN
)
176 enum tree_code code1
= gimple_assign_rhs_code (stmt1
);
177 enum tree_code code2
= gimple_assign_rhs_code (stmt2
);
179 if (!poor_ifcvt_candidate_code (code1
)
180 && !poor_ifcvt_candidate_code (code2
))
182 tree lhs1
= gimple_assign_lhs (stmt1
);
183 tree lhs2
= gimple_assign_lhs (stmt2
);
184 gimple_stmt_iterator gsi
;
185 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
187 gimple
*phi
= gsi_stmt (gsi
);
188 if ((gimple_phi_arg_def (phi
, 0) == lhs1
189 && gimple_phi_arg_def (phi
, 1) == lhs2
)
190 || (gimple_phi_arg_def (phi
, 1) == lhs1
191 && gimple_phi_arg_def (phi
, 0) == lhs2
))
193 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
195 "Block %d appears to be a join point for "
196 "if-convertable diamond.\n",
205 /* Canonicalize the form. */
206 if (num_stmts_in_pred1
== 0 && num_stmts_in_pred2
== 1)
208 std::swap (pred1
, pred2
);
209 std::swap (num_stmts_in_pred1
, num_stmts_in_pred2
);
212 /* Another variant. This one is half-diamond. */
213 if (num_stmts_in_pred1
== 1 && num_stmts_in_pred2
== 0
214 && dominated_by_p (CDI_DOMINATORS
, pred1
, pred2
))
216 gimple
*stmt1
= last_and_only_stmt (pred1
);
218 /* The only statement in PRED1 must be an assignment that is
219 not a good candidate for if-conversion. This may need some
221 if (stmt1
&& gimple_code (stmt1
) == GIMPLE_ASSIGN
)
223 enum tree_code code1
= gimple_assign_rhs_code (stmt1
);
225 if (!poor_ifcvt_candidate_code (code1
))
227 tree lhs1
= gimple_assign_lhs (stmt1
);
228 tree rhs1
= gimple_assign_rhs1 (stmt1
);
230 gimple_stmt_iterator gsi
;
231 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
233 gimple
*phi
= gsi_stmt (gsi
);
234 if ((gimple_phi_arg_def (phi
, 0) == lhs1
235 && gimple_phi_arg_def (phi
, 1) == rhs1
)
236 || (gimple_phi_arg_def (phi
, 1) == lhs1
237 && gimple_phi_arg_def (phi
, 0) == rhs1
))
239 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
241 "Block %d appears to be a join point for "
242 "if-convertable half-diamond.\n",
251 /* If the joiner has no PHIs with useful uses there is zero chance
252 of CSE/DCE/jump-threading possibilities exposed by duplicating it. */
253 bool found_useful_phi
= false;
254 for (gphi_iterator si
= gsi_start_phis (bb
); ! gsi_end_p (si
);
257 gphi
*phi
= si
.phi ();
259 imm_use_iterator iter
;
260 FOR_EACH_IMM_USE_FAST (use_p
, iter
, gimple_phi_result (phi
))
262 gimple
*stmt
= USE_STMT (use_p
);
263 if (is_gimple_debug (stmt
))
265 /* If there's a use in the joiner this might be a CSE/DCE
266 opportunity, but not if the use is in a conditional
267 which makes this a likely if-conversion candidate. */
268 if (gimple_bb (stmt
) == bb
269 && (!is_gimple_assign (stmt
)
270 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
273 found_useful_phi
= true;
276 /* If the use is on a loop header PHI and on one path the
277 value is unchanged this might expose a jump threading
279 if (gimple_code (stmt
) == GIMPLE_PHI
280 && gimple_bb (stmt
) == bb
->loop_father
->header
281 /* But for memory the PHI alone isn't good enough. */
282 && ! virtual_operand_p (gimple_phi_result (stmt
)))
284 bool found_unchanged_path
= false;
285 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
286 if (gimple_phi_arg_def (phi
, i
) == gimple_phi_result (stmt
))
288 found_unchanged_path
= true;
291 /* If we found an unchanged path this can only be a threading
292 opportunity if we have uses of the loop header PHI result
293 in a stmt dominating the merge block. Otherwise the
294 splitting may prevent if-conversion. */
295 if (found_unchanged_path
)
297 use_operand_p use2_p
;
298 imm_use_iterator iter2
;
299 FOR_EACH_IMM_USE_FAST (use2_p
, iter2
, gimple_phi_result (stmt
))
301 gimple
*use_stmt
= USE_STMT (use2_p
);
302 if (is_gimple_debug (use_stmt
))
304 basic_block use_bb
= gimple_bb (use_stmt
);
306 && dominated_by_p (CDI_DOMINATORS
, bb
, use_bb
))
308 if (gcond
*cond
= dyn_cast
<gcond
*> (use_stmt
))
309 if (gimple_cond_code (cond
) == EQ_EXPR
310 || gimple_cond_code (cond
) == NE_EXPR
)
311 found_useful_phi
= true;
316 if (found_useful_phi
)
320 if (found_useful_phi
)
323 /* There is one exception namely a controlling condition we can propagate
324 an equivalence from to the joiner. */
325 bool found_cprop_opportunity
= false;
326 basic_block dom
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
327 gcond
*cond
= as_a
<gcond
*> (last_stmt (dom
));
328 if (gimple_cond_code (cond
) == EQ_EXPR
329 || gimple_cond_code (cond
) == NE_EXPR
)
330 for (unsigned i
= 0; i
< 2; ++i
)
332 tree op
= gimple_op (cond
, i
);
333 if (TREE_CODE (op
) == SSA_NAME
)
336 imm_use_iterator iter
;
337 FOR_EACH_IMM_USE_FAST (use_p
, iter
, op
)
339 if (is_gimple_debug (USE_STMT (use_p
)))
341 if (gimple_bb (USE_STMT (use_p
)) == bb
)
343 found_cprop_opportunity
= true;
348 if (found_cprop_opportunity
)
352 if (! found_useful_phi
&& ! found_cprop_opportunity
)
354 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
356 "Block %d is a join that does not expose CSE/DCE/jump-thread "
357 "opportunities when duplicated.\n",
362 /* We may want something here which looks at dataflow and tries
363 to guess if duplication of BB is likely to result in simplification
364 of instructions in BB in either the original or the duplicate. */
366 /* Upper Hard limit on the number statements to copy. */
367 if (num_stmts_in_join
368 >= param_max_jump_thread_duplication_stmts
)
374 /* If the immediate dominator of the latch of the loop is
375 block with conditional branch, then the loop latch is
376 duplicated to its predecessors path preserving the SSA
379 CFG before transformation.
399 Block 8 is the latch. We're going to make copies of block 6 (9 & 10)
400 and wire things up so they look like this:
423 Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which
424 enables CSE, DCE and other optimizations to occur on a larger block
430 bool changed
= false;
433 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
434 initialize_original_copy_tables ();
435 calculate_dominance_info (CDI_DOMINATORS
);
437 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
439 /* Only split paths if we are optimizing this loop for speed. */
440 if (!optimize_loop_for_speed_p (loop
))
443 /* See if there is a block that we can duplicate to split the
444 path to the loop latch. */
446 = find_block_to_duplicate_for_splitting_paths (loop
->latch
);
448 /* BB is the merge point for an IF-THEN-ELSE we want to transform.
450 Essentially we want to create a duplicate of bb and redirect the
451 first predecessor of BB to the duplicate (leaving the second
452 predecessor as is. This will split the path leading to the latch
453 re-using BB to avoid useless copying. */
454 if (bb
&& is_feasible_trace (bb
))
456 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
458 "Duplicating join block %d into predecessor paths\n",
460 basic_block pred0
= EDGE_PRED (bb
, 0)->src
;
461 if (EDGE_COUNT (pred0
->succs
) != 1)
462 pred0
= EDGE_PRED (bb
, 1)->src
;
463 transform_duplicate (pred0
, bb
);
466 /* If BB has an outgoing edge marked as IRREDUCIBLE, then
467 duplicating BB may result in an irreducible region turning
470 Long term we might want to hook this into the block
471 duplication code, but as we've seen with similar changes
472 for edge removal, that can be somewhat risky. */
473 if (EDGE_SUCC (bb
, 0)->flags
& EDGE_IRREDUCIBLE_LOOP
474 || EDGE_SUCC (bb
, 1)->flags
& EDGE_IRREDUCIBLE_LOOP
)
476 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
478 "Join block %d has EDGE_IRREDUCIBLE_LOOP set. "
479 "Scheduling loop fixups.\n",
481 loops_state_set (LOOPS_NEED_FIXUP
);
486 loop_optimizer_finalize ();
487 free_original_copy_tables ();
491 /* Main entry point for splitting paths. Returns TODO_cleanup_cfg if any
492 paths where split, otherwise return zero. */
495 execute_split_paths ()
497 /* If we don't have at least 2 real blocks and backedges in the
498 CFG, then there's no point in trying to perform path splitting. */
499 if (n_basic_blocks_for_fn (cfun
) <= NUM_FIXED_BLOCKS
+ 1
500 || !mark_dfs_back_edges ())
503 bool changed
= split_paths();
505 free_dominance_info (CDI_DOMINATORS
);
507 return changed
? TODO_cleanup_cfg
: 0;
513 return flag_split_paths
;
518 const pass_data pass_data_split_paths
=
520 GIMPLE_PASS
, /* type */
521 "split-paths", /* name */
522 OPTGROUP_NONE
, /* optinfo_flags */
523 TV_SPLIT_PATHS
, /* tv_id */
524 PROP_ssa
, /* properties_required */
525 0, /* properties_provided */
526 0, /* properties_destroyed */
527 0, /* todo_flags_start */
528 TODO_update_ssa
, /* todo_flags_finish */
531 class pass_split_paths
: public gimple_opt_pass
534 pass_split_paths (gcc::context
*ctxt
)
535 : gimple_opt_pass (pass_data_split_paths
, ctxt
)
537 /* opt_pass methods: */
538 opt_pass
* clone () { return new pass_split_paths (m_ctxt
); }
539 virtual bool gate (function
*) { return gate_split_paths (); }
540 virtual unsigned int execute (function
*) { return execute_split_paths (); }
542 }; // class pass_split_paths
547 make_pass_split_paths (gcc::context
*ctxt
)
549 return new pass_split_paths (ctxt
);