/* Support routines for Splitting Paths to loop backedges
- Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Copyright (C) 2015-2020 Free Software Foundation, Inc.
Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
This file is part of GCC.
#include "tree.h"
#include "gimple.h"
#include "tree-pass.h"
+#include "tree-cfg.h"
#include "cfganal.h"
#include "cfgloop.h"
#include "gimple-iterator.h"
#include "tracer.h"
#include "predict.h"
+#include "gimple-ssa.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
/* Given LATCH, the latch block in a loop, see if the shape of the
path reaching LATCH is suitable for being split by duplication.
return NULL;
/* And that BB's immediate dominator's successors are the
- predecessors of BB. */
- if (!find_edge (bb_idom, EDGE_PRED (bb, 0)->src)
- || !find_edge (bb_idom, EDGE_PRED (bb, 1)->src))
+ predecessors of BB or BB itself. */
+ if (!(EDGE_PRED (bb, 0)->src == bb_idom
+ || find_edge (bb_idom, EDGE_PRED (bb, 0)->src))
+ || !(EDGE_PRED (bb, 1)->src == bb_idom
+ || find_edge (bb_idom, EDGE_PRED (bb, 1)->src)))
+ return NULL;
+
+ /* And that the predecessors of BB each have a single successor
+ or are BB's immediate domiator itself. */
+ if (!(EDGE_PRED (bb, 0)->src == bb_idom
+ || single_succ_p (EDGE_PRED (bb, 0)->src))
+ || !(EDGE_PRED (bb, 1)->src == bb_idom
+ || single_succ_p (EDGE_PRED (bb, 1)->src)))
return NULL;
/* So at this point we have a simple diamond for an IF-THEN-ELSE
return NULL;
}
+/* Return the number of non-debug statements in a block. */
+static unsigned int
+count_stmts_in_block (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+ unsigned int num_stmts = 0;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (!is_gimple_debug (stmt))
+ num_stmts++;
+ }
+ return num_stmts;
+}
+
+/* Return TRUE if CODE represents a tree code that is not likely to
+ be easily if-convertable because it likely expands into multiple
+ insns, FALSE otherwise. */
+static bool
+poor_ifcvt_candidate_code (enum tree_code code)
+{
+ return (code == MIN_EXPR
+ || code == MAX_EXPR
+ || code == ABS_EXPR
+ || code == COND_EXPR
+ || code == CALL_EXPR);
+}
+
/* Return TRUE if BB is a reasonable block to duplicate by examining
its size, false otherwise. BB will always be a loop latch block.
- Should this use the same tests as we do for jump threading? */
+ Things to consider:
+
+ We do not want to spoil if-conversion if at all possible.
+
+ Most of the benefit seems to be from eliminating the unconditional
+ jump rather than CSE/DCE opportunities. So favor duplicating
+ small latches. A latch with just a conditional branch is ideal.
+
+ CSE/DCE opportunties crop up when statements from the predecessors
+ feed statements in the latch and allow statements in the latch to
+ simplify. */
static bool
is_feasible_trace (basic_block bb)
{
- int num_stmt = 0;
- gimple_stmt_iterator gsi;
+ basic_block pred1 = EDGE_PRED (bb, 0)->src;
+ basic_block pred2 = EDGE_PRED (bb, 1)->src;
+ int num_stmts_in_join = count_stmts_in_block (bb);
+ int num_stmts_in_pred1
+ = EDGE_COUNT (pred1->succs) == 1 ? count_stmts_in_block (pred1) : 0;
+ int num_stmts_in_pred2
+ = EDGE_COUNT (pred2->succs) == 1 ? count_stmts_in_block (pred2) : 0;
+
+ /* This is meant to catch cases that are likely opportunities for
+ if-conversion. Essentially we look for the case where
+ BB's predecessors are both single statement blocks where
+ the output of that statement feed the same PHI in BB. */
+ if (num_stmts_in_pred1 == 1 && num_stmts_in_pred2 == 1)
+ {
+ gimple *stmt1 = last_and_only_stmt (pred1);
+ gimple *stmt2 = last_and_only_stmt (pred2);
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ if (stmt1 && stmt2
+ && gimple_code (stmt1) == GIMPLE_ASSIGN
+ && gimple_code (stmt2) == GIMPLE_ASSIGN)
+ {
+ enum tree_code code1 = gimple_assign_rhs_code (stmt1);
+ enum tree_code code2 = gimple_assign_rhs_code (stmt2);
+
+ if (!poor_ifcvt_candidate_code (code1)
+ && !poor_ifcvt_candidate_code (code2))
+ {
+ tree lhs1 = gimple_assign_lhs (stmt1);
+ tree lhs2 = gimple_assign_lhs (stmt2);
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *phi = gsi_stmt (gsi);
+ if ((gimple_phi_arg_def (phi, 0) == lhs1
+ && gimple_phi_arg_def (phi, 1) == lhs2)
+ || (gimple_phi_arg_def (phi, 1) == lhs1
+ && gimple_phi_arg_def (phi, 0) == lhs2))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Block %d appears to be a join point for "
+ "if-convertable diamond.\n",
+ bb->index);
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ /* Canonicalize the form. */
+ if (num_stmts_in_pred1 == 0 && num_stmts_in_pred2 == 1)
{
- gimple *stmt = gsi_stmt (gsi);
- if (!is_gimple_debug (stmt))
- num_stmt++;
+ std::swap (pred1, pred2);
+ std::swap (num_stmts_in_pred1, num_stmts_in_pred2);
+ }
+
+ /* Another variant. This one is half-diamond. */
+ if (num_stmts_in_pred1 == 1 && num_stmts_in_pred2 == 0
+ && dominated_by_p (CDI_DOMINATORS, pred1, pred2))
+ {
+ gimple *stmt1 = last_and_only_stmt (pred1);
+
+ /* The only statement in PRED1 must be an assignment that is
+ not a good candidate for if-conversion. This may need some
+ generalization. */
+ if (stmt1 && gimple_code (stmt1) == GIMPLE_ASSIGN)
+ {
+ enum tree_code code1 = gimple_assign_rhs_code (stmt1);
+
+ if (!poor_ifcvt_candidate_code (code1))
+ {
+ tree lhs1 = gimple_assign_lhs (stmt1);
+ tree rhs1 = gimple_assign_rhs1 (stmt1);
+
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *phi = gsi_stmt (gsi);
+ if ((gimple_phi_arg_def (phi, 0) == lhs1
+ && gimple_phi_arg_def (phi, 1) == rhs1)
+ || (gimple_phi_arg_def (phi, 1) == lhs1
+ && gimple_phi_arg_def (phi, 0) == rhs1))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Block %d appears to be a join point for "
+ "if-convertable half-diamond.\n",
+ bb->index);
+ return false;
+ }
+ }
+ }
+ }
}
- /* We may want to limit how many statements we copy. */
- if (num_stmt > 1)
- return true;
+ /* If the joiner has no PHIs with useful uses there is zero chance
+ of CSE/DCE/jump-threading possibilities exposed by duplicating it. */
+ bool found_useful_phi = false;
+ for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
+ gsi_next (&si))
+ {
+ gphi *phi = si.phi ();
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, gimple_phi_result (phi))
+ {
+ gimple *stmt = USE_STMT (use_p);
+ if (is_gimple_debug (stmt))
+ continue;
+ /* If there's a use in the joiner this might be a CSE/DCE
+ opportunity, but not if the use is in a conditional
+ which makes this a likely if-conversion candidate. */
+ if (gimple_bb (stmt) == bb
+ && (!is_gimple_assign (stmt)
+ || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
+ != tcc_comparison)))
+ {
+ found_useful_phi = true;
+ break;
+ }
+ /* If the use is on a loop header PHI and on one path the
+ value is unchanged this might expose a jump threading
+ opportunity. */
+ if (gimple_code (stmt) == GIMPLE_PHI
+ && gimple_bb (stmt) == bb->loop_father->header
+ /* But for memory the PHI alone isn't good enough. */
+ && ! virtual_operand_p (gimple_phi_result (stmt)))
+ {
+ bool found_unchanged_path = false;
+ for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
+ if (gimple_phi_arg_def (phi, i) == gimple_phi_result (stmt))
+ {
+ found_unchanged_path = true;
+ break;
+ }
+ /* If we found an unchanged path this can only be a threading
+ opportunity if we have uses of the loop header PHI result
+ in a stmt dominating the merge block. Otherwise the
+ splitting may prevent if-conversion. */
+ if (found_unchanged_path)
+ {
+ use_operand_p use2_p;
+ imm_use_iterator iter2;
+ FOR_EACH_IMM_USE_FAST (use2_p, iter2, gimple_phi_result (stmt))
+ {
+ gimple *use_stmt = USE_STMT (use2_p);
+ if (is_gimple_debug (use_stmt))
+ continue;
+ basic_block use_bb = gimple_bb (use_stmt);
+ if (use_bb != bb
+ && dominated_by_p (CDI_DOMINATORS, bb, use_bb))
+ {
+ if (gcond *cond = dyn_cast <gcond *> (use_stmt))
+ if (gimple_cond_code (cond) == EQ_EXPR
+ || gimple_cond_code (cond) == NE_EXPR)
+ found_useful_phi = true;
+ break;
+ }
+ }
+ }
+ if (found_useful_phi)
+ break;
+ }
+ }
+ if (found_useful_phi)
+ break;
+ }
+ /* There is one exception namely a controlling condition we can propagate
+ an equivalence from to the joiner. */
+ bool found_cprop_opportunity = false;
+ basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
+ gcond *cond = as_a <gcond *> (last_stmt (dom));
+ if (gimple_cond_code (cond) == EQ_EXPR
+ || gimple_cond_code (cond) == NE_EXPR)
+ for (unsigned i = 0; i < 2; ++i)
+ {
+ tree op = gimple_op (cond, i);
+ if (TREE_CODE (op) == SSA_NAME)
+ {
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, op)
+ {
+ if (is_gimple_debug (USE_STMT (use_p)))
+ continue;
+ if (gimple_bb (USE_STMT (use_p)) == bb)
+ {
+ found_cprop_opportunity = true;
+ break;
+ }
+ }
+ }
+ if (found_cprop_opportunity)
+ break;
+ }
+
+ if (! found_useful_phi && ! found_cprop_opportunity)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Block %d is a join that does not expose CSE/DCE/jump-thread "
+ "opportunities when duplicated.\n",
+ bb->index);
+ return false;
+ }
+
+ /* We may want something here which looks at dataflow and tries
+ to guess if duplication of BB is likely to result in simplification
+ of instructions in BB in either the original or the duplicate. */
- return false;
+ /* Upper Hard limit on the number statements to copy. */
+ if (num_stmts_in_join
+ >= param_max_jump_thread_duplication_stmts)
+ return false;
+
+ return true;
}
/* If the immediate dominator of the latch of the loop is
"Duplicating join block %d into predecessor paths\n",
bb->index);
basic_block pred0 = EDGE_PRED (bb, 0)->src;
+ if (EDGE_COUNT (pred0->succs) != 1)
+ pred0 = EDGE_PRED (bb, 1)->src;
transform_duplicate (pred0, bb);
changed = true;
+
+ /* If BB has an outgoing edge marked as IRREDUCIBLE, then
+ duplicating BB may result in an irreducible region turning
+ into a natural loop.
+
+ Long term we might want to hook this into the block
+ duplication code, but as we've seen with similar changes
+ for edge removal, that can be somewhat risky. */
+ if (EDGE_SUCC (bb, 0)->flags & EDGE_IRREDUCIBLE_LOOP
+ || EDGE_SUCC (bb, 1)->flags & EDGE_IRREDUCIBLE_LOOP)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Join block %d has EDGE_IRREDUCIBLE_LOOP set. "
+ "Scheduling loop fixups.\n",
+ bb->index);
+ loops_state_set (LOOPS_NEED_FIXUP);
+ }
}
}