--- /dev/null
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+
+#define N 1024
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+unsigned test4(unsigned x)
+{
+ unsigned ret = 0;
+ for (int i = 0; i < N; i++)
+ {
+ vect_b[i] = x + i;
+ if (vect_a[i] > x)
+ {
+ ret *= vect_a[i];
+ return vect_a[i];
+ }
+ vect_a[i] = x;
+ ret += vect_a[i] + vect_b[i];
+ }
+ return ret;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+
+#ifndef N
+#define N 800
+#endif
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+unsigned test4(unsigned x)
+{
+ unsigned ret = 0;
+ for (int i = 0; i < N; i++)
+ {
+ vect_b[i] = x + i;
+ if (vect_a[i]*2 != x)
+ break;
+ vect_a[i] = x;
+
+ }
+ return ret;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+
+#ifndef N
+#define N 800
+#endif
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+unsigned test4(unsigned x)
+{
+ unsigned ret = 0;
+ for (int i = 0; i < N; i+=2)
+ {
+ vect_b[i] = x + i;
+ vect_b[i+1] = x + i+1;
+ if (vect_a[i]*2 != x)
+ break;
+ if (vect_a[i+1]*2 != x)
+ break;
+ vect_a[i] = x;
+ vect_a[i+1] = x;
+
+ }
+ return ret;
+}
unsigned i, j;
FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
{
+ if (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance)) != vect_internal_def)
+ continue;
+
stmt_vec_info vinfo;
vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0];
if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
"Analyzing vectorizable constructor: %G\n",
root_stmt_infos[0]->stmt);
}
+ else if (kind == slp_inst_kind_gcond)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Analyzing vectorizable control flow: %G",
+ root_stmt_infos[0]->stmt);
+ }
if (dump_enabled_p ())
{
bst_map, NULL, force_single_lane);
}
}
+
+ /* Find SLP sequences starting from gconds. */
+ for (auto cond : LOOP_VINFO_LOOP_CONDS (loop_vinfo))
+ {
+ auto cond_info = loop_vinfo->lookup_stmt (cond);
+
+ cond_info = vect_stmt_to_vectorize (cond_info);
+ vec<stmt_vec_info> roots = vNULL;
+ roots.safe_push (cond_info);
+ gimple *stmt = STMT_VINFO_STMT (cond_info);
+ tree args0 = gimple_cond_lhs (stmt);
+ tree args1 = gimple_cond_rhs (stmt);
+
+ /* These should be enforced by cond lowering. */
+ gcc_assert (gimple_cond_code (stmt) == NE_EXPR);
+ gcc_assert (zerop (args1));
+
+ /* An argument without a loop def will be codegened from vectorizing the
+ root gcond itself. As such we don't need to try to build an SLP tree
+ from them. It's highly likely that the resulting SLP tree here if both
+ arguments have a def will be incompatible, but we rely on it being split
+ later on. */
+ if (auto varg = loop_vinfo->lookup_def (args0))
+ {
+ vec<stmt_vec_info> stmts;
+ vec<tree> remain = vNULL;
+ stmts.create (1);
+ stmts.quick_push (vect_stmt_to_vectorize (varg));
+
+ vect_build_slp_instance (vinfo, slp_inst_kind_gcond,
+ stmts, roots, remain,
+ max_tree_size, &limit,
+ bst_map, NULL, force_single_lane);
+ }
+ else
+ {
+ /* Create a new SLP instance. */
+ slp_instance new_instance = XNEW (class _slp_instance);
+ vec<tree> ops;
+ ops.create (1);
+ ops.quick_push (args0);
+ slp_tree invnode = vect_create_new_slp_node (ops);
+ SLP_TREE_DEF_TYPE (invnode) = vect_external_def;
+ SLP_INSTANCE_TREE (new_instance) = invnode;
+ SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = 1;
+ SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ SLP_INSTANCE_ROOT_STMTS (new_instance) = roots;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = vNULL;
+ SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_gcond;
+ new_instance->reduc_phis = NULL;
+ new_instance->cost_vec = vNULL;
+ new_instance->subgraph_entries = vNULL;
+ vinfo->slp_instances.safe_push (new_instance);
+ }
+ }
+
+ /* Find and create slp instances for inductions that have been forced
+ live due to early break. */
+ edge latch_e = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
+ for (auto stmt_info : LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo))
+ {
+ vec<stmt_vec_info> stmts;
+ vec<stmt_vec_info> roots = vNULL;
+ vec<tree> remain = vNULL;
+ gphi *lc_phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info));
+ tree def = gimple_phi_arg_def_from_edge (lc_phi, latch_e);
+ stmt_vec_info lc_info = loop_vinfo->lookup_def (def);
+ stmts.create (1);
+ stmts.quick_push (vect_stmt_to_vectorize (lc_info));
+ vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
+ stmts, roots, remain,
+ max_tree_size, &limit,
+ bst_map, NULL, force_single_lane);
+ }
}
hash_set<slp_tree> visited_patterns;
}
}
}
- /* No def means this is a loo_vect sink. */
- if (!any_def)
+ /* No def means this is a loop_vect sink. Gimple conditionals also don't have a
+ def but shouldn't be considered sinks. */
+ if (!any_def && STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
(SLP_INSTANCE_TREE (instance))))))
/* Check we can vectorize the reduction. */
|| (SLP_INSTANCE_KIND (instance) == slp_inst_kind_bb_reduc
- && !vectorizable_bb_reduc_epilogue (instance, &cost_vec)))
+ && !vectorizable_bb_reduc_epilogue (instance, &cost_vec))
+ /* Check we can vectorize the gcond. */
+ || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_gcond
+ && !vectorizable_early_exit (vinfo,
+ SLP_INSTANCE_ROOT_STMTS (instance)[0],
+ NULL, NULL,
+ SLP_INSTANCE_TREE (instance),
+ &cost_vec)))
{
cost_vec.release ();
slp_tree node = SLP_INSTANCE_TREE (instance);
!gsi_end_p (gsi); gsi_next (&gsi))
{
gassign *assign = dyn_cast<gassign *> (gsi_stmt (gsi));
+ /* This can be used to start SLP discovery for early breaks for BB early breaks
+ when we get that far. */
if (!assign)
continue;
/* Vectorize the instance root. */
void
-vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
+vectorize_slp_instance_root_stmt (vec_info *vinfo, slp_tree node, slp_instance instance)
{
gassign *rstmt = NULL;
update_stmt (gsi_stmt (rgsi));
return;
}
+ else if (instance->kind == slp_inst_kind_gcond)
+ {
+ /* Only support a single root for now as we can't codegen CFG yet and so we
+ can't support lane > 1 at this time. */
+ gcc_assert (instance->root_stmts.length () == 1);
+ auto root_stmt_info = instance->root_stmts[0];
+ auto last_stmt = STMT_VINFO_STMT (root_stmt_info);
+ gimple_stmt_iterator rgsi = gsi_for_stmt (last_stmt);
+ gimple *vec_stmt = NULL;
+ gcc_assert (!SLP_TREE_VEC_DEFS (node).is_empty ());
+ bool res = vectorizable_early_exit (vinfo, root_stmt_info, &rgsi,
+ &vec_stmt, node, NULL);
+ gcc_assert (res);
+ return;
+ }
else
gcc_unreachable ();
vect_schedule_scc (vinfo, node, instance, scc_info, maxdfs, stack);
if (!SLP_INSTANCE_ROOT_STMTS (instance).is_empty ())
- vectorize_slp_instance_root_stmt (node, instance);
+ vectorize_slp_instance_root_stmt (vinfo, node, instance);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
dump_printf_loc (MSG_NOTE, vect_location,
"vec_stmt_relevant_p: induction forced for "
"early break.\n");
+ LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info);
*live_p = true;
}
/* Check to see if the current early break given in STMT_INFO is valid for
vectorization. */
-static bool
+bool
vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi, gimple **vec_stmt,
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
slp_tree slp_op0;
tree op0;
enum vect_def_type dt0;
- if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0,
- &vectype))
+
+ /* Early break gcond kind SLP trees can be root only and have no children,
+ for instance in the case where the argument is an external. If that's
+ the case there is no operand to analyse use of. */
+ if ((!slp_node || !SLP_TREE_CHILDREN (slp_node).is_empty ())
+ && !vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0,
+ &vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
return false;
}
+ /* For SLP we don't want to use the type of the operands of the SLP node, when
+ vectorizing using SLP slp_node will be the children of the gcond and we
+ want to use the type of the direct children which since the gcond is root
+ will be the current node, rather than a child node as vect_is_simple_use
+ assumes. */
+ if (slp_node)
+ vectype = SLP_TREE_VECTYPE (slp_node);
+
if (!vectype)
return false;
machine_mode mode = TYPE_MODE (vectype);
- int ncopies;
+ int ncopies, vec_num;
if (slp_node)
- ncopies = 1;
+ {
+ ncopies = 1;
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ }
else
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ {
+ ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ vec_num = 1;
+ }
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
{
if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
OPTIMIZE_FOR_SPEED))
- vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
+ vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num,
+ vectype, 1);
else
- vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
+ vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+ vectype, NULL);
}
return true;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
- if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
- vec_stmt, slp_node, cost_vec))
- gcc_unreachable ();
+ /* For SLP we don't do codegen of the body starting from the gcond, the gconds are
+ roots and so by the time we get to them we have already codegened the SLP tree
+ and so we shouldn't try to do so again. The arguments have already been
+ vectorized. It's not very clean to do this here, But the masking code below is
+ complex and this keeps it all in one place to ease fixes and backports. Once we
+ drop the non-SLP loop vect or split vectorizable_* this can be simplified. */
+ if (!slp_node)
+ {
+ if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
+ vec_stmt, slp_node, cost_vec))
+ gcc_unreachable ();
+ }
gimple *stmt = STMT_VINFO_STMT (stmt_info);
basic_block cond_bb = gimple_bb (stmt);
for (unsigned i = 0; i < stmts.length (); i++)
{
tree stmt_mask
- = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype,
- i);
+ = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies * vec_num,
+ vectype, i);
stmt_mask
= prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
stmts[i], &cond_gsi);
for (unsigned i = 0; i < stmts.length (); i++)
{
tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
- lens, ncopies, vectype,
- stmts[i], i, 1);
+ lens, ncopies * vec_num,
+ vectype, stmts[i], i, 1);
workset.quick_push (len_mask);
}
slp_inst_kind_reduc_group,
slp_inst_kind_reduc_chain,
slp_inst_kind_bb_reduc,
- slp_inst_kind_ctor
+ slp_inst_kind_ctor,
+ slp_inst_kind_gcond
};
/* SLP instance is a sequence of stmts in a loop that can be packed into
/* Statements whose VUSES need updating if early break vectorization is to
happen. */
auto_vec<gimple*> early_break_vuses;
+
+ /* Record statements that are needed to be live for early break vectorization
+ but may not have an LC PHI node materialized yet in the exits. */
+ auto_vec<stmt_vec_info> early_break_live_ivs;
} *loop_vec_info;
/* Access Functions. */
#define LOOP_VINFO_EARLY_BRK_STORES(L) (L)->early_break_stores
#define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \
(single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
+#define LOOP_VINFO_EARLY_BREAKS_LIVE_IVS(L) \
+ (L)->early_break_live_ivs
#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
stmt_vector_for_cost *);
extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info,
gimple **, slp_tree, stmt_vector_for_cost *);
+extern bool vectorizable_early_exit (vec_info *, stmt_vec_info,
+ gimple_stmt_iterator *, gimple **,
+ slp_tree, stmt_vector_for_cost *);
extern bool vect_emulated_vector_p (tree);
extern bool vect_can_vectorize_without_simd_p (tree_code);
extern bool vect_can_vectorize_without_simd_p (code_helper);