static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *,
unsigned *);
static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
- gphi **, bool *, bool);
+ gphi **);
/* Function vect_is_simple_iv_evolution.
slp analyses or not. */
static void
-vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
- bool slp)
+vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop)
{
basic_block bb = loop->header;
auto_vec<stmt_vec_info, 64> worklist;
&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
gphi *double_reduc;
- bool reduc_chain;
stmt_vec_info reduc_stmt_info
- = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc,
- &reduc_chain, slp);
+ = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc);
if (reduc_stmt_info && double_reduc)
{
- bool inner_chain;
stmt_vec_info inner_phi_info
= loop_vinfo->lookup_stmt (double_reduc);
/* ??? Pass down flag we're the inner loop of a double reduc. */
stmt_vec_info inner_reduc_info
- = vect_is_simple_reduction (loop_vinfo, inner_phi_info,
- NULL, &inner_chain, slp);
+ = vect_is_simple_reduction (loop_vinfo, inner_phi_info, NULL);
if (inner_reduc_info)
{
STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_reduction_def;
- /* Store the reduction cycles for possible vectorization in
- loop-aware SLP if it was not detected as reduction
- chain. */
- if (! reduc_chain)
- LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push
- (reduc_stmt_info);
+ LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt_info);
}
}
else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi))
a[i] = i; */
static void
-vect_analyze_scalar_cycles (loop_vec_info loop_vinfo, bool slp)
+vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- vect_analyze_scalar_cycles_1 (loop_vinfo, loop, slp);
+ vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
/* When vectorizing an outer-loop, the inner-loop is executed sequentially.
Reductions in such inner-loop therefore have different properties than
current checks are too strict. */
if (loop->inner)
- vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner, slp);
-}
-
-/* Transfer group and reduction information from STMT_INFO to its
- pattern stmt. */
-
-static void
-vect_fixup_reduc_chain (stmt_vec_info stmt_info)
-{
- stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info);
- stmt_vec_info stmtp;
- gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)
- && REDUC_GROUP_FIRST_ELEMENT (stmt_info));
- REDUC_GROUP_SIZE (firstp) = REDUC_GROUP_SIZE (stmt_info);
- do
- {
- stmtp = STMT_VINFO_RELATED_STMT (stmt_info);
- gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)
- == STMT_VINFO_DEF_TYPE (stmt_info));
- REDUC_GROUP_FIRST_ELEMENT (stmtp) = firstp;
- stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info);
- if (stmt_info)
- REDUC_GROUP_NEXT_ELEMENT (stmtp)
- = STMT_VINFO_RELATED_STMT (stmt_info);
- }
- while (stmt_info);
-}
-
-/* Fixup scalar cycles that now have their stmts detected as patterns. */
-
-static void
-vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
-{
- stmt_vec_info first;
- unsigned i;
-
- FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
- {
- stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
- while (next)
- {
- if ((STMT_VINFO_IN_PATTERN_P (next)
- != STMT_VINFO_IN_PATTERN_P (first))
- || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next)) == -1)
- break;
- next = REDUC_GROUP_NEXT_ELEMENT (next);
- }
- /* If all reduction chain members are well-formed patterns adjust
- the group to group the pattern stmts instead. */
- if (! next
- && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first)) != -1)
- {
- if (STMT_VINFO_IN_PATTERN_P (first))
- {
- vect_fixup_reduc_chain (first);
- LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
- = STMT_VINFO_RELATED_STMT (first);
- }
- }
- /* If not all stmt in the chain are patterns or if we failed
- to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
- it as regular reduction instead. */
- else
- {
- stmt_vec_info vinfo = first;
- stmt_vec_info last = NULL;
- while (vinfo)
- {
- next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
- REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
- REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
- last = vinfo;
- vinfo = next;
- }
- STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))
- = vect_internal_def;
- loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last));
- LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).unordered_remove (i);
- --i;
- }
- }
+ vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
}
/* Function vect_get_loop_niters.
/* Classify all cross-iteration scalar data-flow cycles.
Cross-iteration cycles caused by virtual phis are analyzed separately. */
- vect_analyze_scalar_cycles (loop_vinfo, !force_single_lane);
+ vect_analyze_scalar_cycles (loop_vinfo);
vect_pattern_recog (loop_vinfo);
- vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
-
/* Analyze the access patterns of the data-refs in the loop (consecutive,
complex, etc.). FORNOW: Only handle consecutive access pattern. */
if (applying_suggested_uf)
return ok;
- /* If there are reduction chains re-trying will fail anyway. */
- if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
- return ok;
-
/* Likewise if the grouped loads or stores in the SLP cannot be handled
via interleaving or lane instructions. */
slp_instance instance;
static stmt_vec_info
vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
- gphi **double_reduc, bool *reduc_chain_p, bool slp)
+ gphi **double_reduc)
{
gphi *phi = as_a <gphi *> (phi_info->stmt);
gimple *phi_use_stmt = NULL;
bool inner_loop_of_double_reduc = double_reduc == NULL;
if (double_reduc)
*double_reduc = NULL;
- *reduc_chain_p = false;
STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION;
tree phi_name = PHI_RESULT (phi);
if (code == COND_EXPR && !nested_in_vect_loop)
STMT_VINFO_REDUC_TYPE (phi_info) = COND_REDUCTION;
- /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP
- reduction chain for which the additional restriction is that
- all operations in the chain are the same. */
- auto_vec<stmt_vec_info, 8> reduc_chain;
+ /* Fill in STMT_VINFO_REDUC_IDX. */
unsigned i;
- bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
for (i = path.length () - 1; i >= 1; --i)
{
gimple *stmt = USE_STMT (path[i].second);
STMT_VINFO_REDUC_IDX (stmt_info)
= path[i].second->use - gimple_call_arg_ptr (call, 0);
}
- bool leading_conversion = (CONVERT_EXPR_CODE_P (op.code)
- && (i == 1 || i == path.length () - 1));
- if ((op.code != code && !leading_conversion)
- /* We can only handle the final value in epilogue
- generation for reduction chains. */
- || (i != 1 && !has_single_use (gimple_get_lhs (stmt))))
- is_slp_reduc = false;
- /* For reduction chains we support a trailing/leading
- conversions. We do not store those in the actual chain. */
- if (leading_conversion)
- continue;
- reduc_chain.safe_push (stmt_info);
}
- if (slp && is_slp_reduc && reduc_chain.length () > 1)
- {
- for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
- {
- REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
- REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
- }
- REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
- REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
-
- /* Save the chain for further analysis in SLP detection. */
- LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
- REDUC_GROUP_SIZE (reduc_chain[0]) = reduc_chain.length ();
-
- *reduc_chain_p = true;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "reduction: detected reduction chain\n");
- }
- else if (dump_enabled_p ())
+ if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"reduction: detected reduction\n");
# b1 = phi <b2, b0>
a2 = operation (a1)
b2 = operation (b1) */
- const bool slp_reduc
- = SLP_INSTANCE_KIND (slp_node_instance) != slp_inst_kind_reduc_chain;
+ const bool slp_reduc = !reduc_info->is_reduc_chain;
tree induction_index = NULL_TREE;
unsigned int group_size = SLP_TREE_LANES (slp_node);
bool single_defuse_cycle = false;
tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
tree cond_reduc_val = NULL_TREE;
- const bool reduc_chain
- = SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_chain;
/* Make sure it was already recognized as a reduction computation. */
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
double_reduc = true;
}
+ const bool reduc_chain = reduc_info->is_reduc_chain;
slp_node_instance->reduc_phis = slp_node;
/* ??? We're leaving slp_node to point to the PHIs, we only
need it to get at the number of vector stmts which wasn't
/* Verify following REDUC_IDX from the latch def leads us back to the PHI
and compute the reduction chain length. Discover the real
- reduction operation stmt on the way (stmt_info and slp_for_stmt_info). */
- tree reduc_def
- = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, loop_latch_edge (loop));
+ reduction operation stmt on the way (slp_for_stmt_info). */
unsigned reduc_chain_length = 0;
- bool only_slp_reduc_chain = true;
stmt_info = NULL;
slp_tree slp_for_stmt_info = NULL;
slp_tree vdef_slp = slp_node_instance->root;
- /* For double-reductions we start SLP analysis at the inner loop LC PHI
- which is the def of the outer loop live stmt. */
- if (double_reduc)
- vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[0];
- while (reduc_def != PHI_RESULT (reduc_def_phi))
+ while (vdef_slp != slp_node)
{
- stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
- stmt_vec_info vdef = vect_stmt_to_vectorize (def);
- int reduc_idx = STMT_VINFO_REDUC_IDX (vdef);
- if (STMT_VINFO_REDUC_IDX (vdef) == -1
- || SLP_TREE_REDUC_IDX (vdef_slp) == -1)
+ int reduc_idx = SLP_TREE_REDUC_IDX (vdef_slp);
+ if (reduc_idx == -1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"reduction chain broken by patterns.\n");
return false;
}
- if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
- only_slp_reduc_chain = false;
+ stmt_vec_info vdef = SLP_TREE_REPRESENTATIVE (vdef_slp);
+ if (is_a <gphi *> (vdef->stmt))
+ {
+ vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
+ /* Do not count PHIs towards the chain length. */
+ continue;
+ }
gimple_match_op op;
if (!gimple_extract_op (vdef->stmt, &op))
{
else
{
/* First non-conversion stmt. */
- if (!stmt_info)
- {
- stmt_info = vdef;
- slp_for_stmt_info = vdef_slp;
- }
+ if (!slp_for_stmt_info)
+ slp_for_stmt_info = vdef_slp;
if (lane_reducing_op_p (op.code))
{
}
else if (!vectype_in)
vectype_in = SLP_TREE_VECTYPE (slp_node);
- if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
- {
- gcc_assert (reduc_idx == SLP_TREE_REDUC_IDX (vdef_slp));
- vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
- }
+ vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
}
-
- reduc_def = op.ops[reduc_idx];
reduc_chain_length++;
}
+ stmt_info = SLP_TREE_REPRESENTATIVE (slp_for_stmt_info);
+
/* PHIs should not participate in patterns. */
gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
- /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
- element. */
- if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
- {
- gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
- stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
- }
- if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
- gcc_assert (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
-
/* 1. Is vectorizable reduction? */
/* Not supportable if the reduction variable is used in the loop, unless
it's a reduction chain. */
{
/* When vectorizing a reduction chain w/o SLP the reduction PHI
is not directy used in stmt. */
- if (!only_slp_reduc_chain
- && reduc_chain_length != 1)
+ if (reduc_chain_length != 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
/* All but single defuse-cycle optimized and fold-left reductions go
through their own vectorizable_* routines. */
+ stmt_vec_info tem
+ = SLP_TREE_REPRESENTATIVE (SLP_INSTANCE_TREE (slp_node_instance));
if (!single_defuse_cycle && reduction_type != FOLD_LEFT_REDUCTION)
+ STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
+ else
{
- stmt_vec_info tem
- = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
- if (REDUC_GROUP_FIRST_ELEMENT (tem))
- {
- gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
- tem = REDUC_GROUP_FIRST_ELEMENT (tem);
- }
- STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
- STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
+ STMT_VINFO_DEF_TYPE (tem) = vect_reduction_def;
+ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+ vect_reduction_update_partial_vector_usage (loop_vinfo, reduc_info,
+ slp_node, op.code, op.type,
+ vectype_in);
}
- else if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
- vect_reduction_update_partial_vector_usage (loop_vinfo, reduc_info,
- slp_node, op.code, op.type,
- vectype_in);
return true;
}
int i;
bool nested_cycle = false;
int vec_num;
- const bool reduc_chain
- = SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_chain;
if (nested_in_vect_loop_p (loop, stmt_info))
{
vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node);
unsigned int num_phis = stmts.length ();
- if (reduc_chain)
+ if (reduc_info->is_reduc_chain)
num_phis = 1;
initial_values.reserve (num_phis);
for (unsigned int i = 0; i < num_phis; ++i)
Return FALSE if SLP build fails. */
static bool
-vect_analyze_slp_reduc_chain (vec_info *vinfo,
+vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
scalar_stmts_to_slp_tree_map_t *bst_map,
- stmt_vec_info stmt_info,
+ vec<stmt_vec_info> &scalar_stmts,
+ stmt_vec_info reduc_phi_info,
unsigned max_tree_size, unsigned *limit)
{
- vec<stmt_vec_info> scalar_stmts;
-
- /* Collect the reduction stmts and store them in scalar_stmts. */
- scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info));
- stmt_vec_info next_info = stmt_info;
- while (next_info)
- {
- scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
- next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
- }
- /* Mark the first element of the reduction chain as reduction to properly
- transform the node. In the reduction analysis phase only the last
- element of the chain is marked as reduction. */
- STMT_VINFO_DEF_TYPE (stmt_info)
- = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
- STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
- = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
+ /* If there's no budget left bail out early. */
+ if (*limit == 0)
+ return false;
/* Build the tree for the SLP instance. */
vec<stmt_vec_info> root_stmt_infos = vNULL;
vec<tree> remain = vNULL;
- /* If there's no budget left bail out early. */
- if (*limit == 0)
- return false;
-
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
- "Starting SLP discovery for\n");
+ "Starting SLP discovery of reduction chain for\n");
for (unsigned i = 0; i < scalar_stmts.length (); ++i)
dump_printf_loc (MSG_NOTE, vect_location,
" %G", scalar_stmts[i]->stmt);
poly_uint64 max_nunits = 1;
unsigned tree_size = 0;
+ /* ??? We need this only for SLP discovery. */
+ for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+ REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = scalar_stmts[0];
+
slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
&max_nunits, matches, limit,
&tree_size, bst_map);
+
+ for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+ REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = NULL;
+
if (node != NULL)
{
- /* Calculate the unrolling factor based on the smallest type. */
- poly_uint64 unrolling_factor
- = calculate_unrolling_factor (max_nunits, group_size);
+ /* Create a new SLP instance. */
+ slp_instance new_instance = XNEW (class _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
+ SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain;
+ new_instance->reduc_phis = NULL;
+ new_instance->cost_vec = vNULL;
+ new_instance->subgraph_entries = vNULL;
- if (maybe_ne (unrolling_factor, 1U)
- && is_a <bb_vec_info> (vinfo))
+ vect_reduc_info reduc_info = info_for_reduction (vinfo, node);
+ reduc_info->is_reduc_chain = true;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP size %u vs. limit %u.\n",
+ tree_size, max_tree_size);
+
+ /* Fixup SLP reduction chains. If this is a reduction chain with
+ a conversion in front amend the SLP tree with a node for that. */
+ gimple *scalar_def = STMT_VINFO_REDUC_DEF (reduc_phi_info)->stmt;
+ if (is_gimple_assign (scalar_def)
+ && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (scalar_def)))
+ {
+ stmt_vec_info conv_info = vect_stmt_to_vectorize
+ (STMT_VINFO_REDUC_DEF (reduc_phi_info));
+ scalar_stmts = vNULL;
+ scalar_stmts.create (group_size);
+ for (unsigned i = 0; i < group_size; ++i)
+ scalar_stmts.quick_push (conv_info);
+ slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
+ SLP_TREE_VECTYPE (conv)
+ = get_vectype_for_scalar_type (vinfo,
+ TREE_TYPE
+ (gimple_assign_lhs (scalar_def)),
+ group_size);
+ SLP_TREE_REDUC_IDX (conv) = 0;
+ conv->cycle_info.id = node->cycle_info.id;
+ SLP_TREE_CHILDREN (conv).quick_push (node);
+ SLP_INSTANCE_TREE (new_instance) = conv;
+ }
+ /* Fill the backedge child of the PHI SLP node. The
+ general matching code cannot find it because the
+ scalar code does not reflect how we vectorize the
+ reduction. */
+ use_operand_p use_p;
+ imm_use_iterator imm_iter;
+ class loop *loop = LOOP_VINFO_LOOP (vinfo);
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter,
+ gimple_get_lhs (scalar_def))
+ /* There are exactly two non-debug uses, the reduction
+ PHI and the loop-closed PHI node. */
+ if (!is_gimple_debug (USE_STMT (use_p))
+ && gimple_bb (USE_STMT (use_p)) == loop->header)
+ {
+ auto_vec<stmt_vec_info, 64> phis (group_size);
+ stmt_vec_info phi_info = vinfo->lookup_stmt (USE_STMT (use_p));
+ for (unsigned i = 0; i < group_size; ++i)
+ phis.quick_push (phi_info);
+ slp_tree *phi_node = bst_map->get (phis);
+ unsigned dest_idx = loop_latch_edge (loop)->dest_idx;
+ SLP_TREE_CHILDREN (*phi_node)[dest_idx]
+ = SLP_INSTANCE_TREE (new_instance);
+ SLP_INSTANCE_TREE (new_instance)->refcnt++;
+ }
+
+ vinfo->slp_instances.safe_push (new_instance);
+
+ /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
+ the number of scalar stmts in the root in a few places.
+ Verify that assumption holds. */
+ gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
+ .length () == group_size);
+
+ if (dump_enabled_p ())
{
- unsigned HOST_WIDE_INT const_max_nunits;
- if (!max_nunits.is_constant (&const_max_nunits)
- || const_max_nunits > group_size)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Final SLP tree for instance %p:\n",
+ (void *) new_instance);
+ vect_print_slp_graph (MSG_NOTE, vect_location,
+ SLP_INSTANCE_TREE (new_instance));
+ }
+
+ return true;
+ }
+ /* Failed to SLP. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP discovery of reduction chain failed\n");
+ return false;
+}
+
+/* Analyze an SLP instance starting from SCALAR_STMTS which are a group
+ of KIND. Return true if successful. */
+
+static bool
+vect_analyze_slp_reduction (loop_vec_info vinfo,
+ stmt_vec_info scalar_stmt,
+ unsigned max_tree_size, unsigned *limit,
+ scalar_stmts_to_slp_tree_map_t *bst_map,
+ bool force_single_lane)
+{
+ slp_instance_kind kind = slp_inst_kind_reduc_group;
+
+ /* If there's no budget left bail out early. */
+ if (*limit == 0)
+ return false;
+
+ vec<stmt_vec_info> scalar_stmts = vNULL;
+ /* Try to gather a reduction chain. */
+ if (! force_single_lane
+ && STMT_VINFO_DEF_TYPE (scalar_stmt) == vect_reduction_def)
+ {
+ bool fail = false;
+ /* ??? We could leave operation code checking to SLP discovery. */
+ code_helper code
+ = STMT_VINFO_REDUC_CODE (STMT_VINFO_REDUC_DEF
+ (vect_orig_stmt (scalar_stmt)));
+ bool first = true;
+ stmt_vec_info next_stmt = scalar_stmt;
+ do
+ {
+ stmt_vec_info stmt = next_stmt;
+ gimple_match_op op;
+ if (!gimple_extract_op (STMT_VINFO_STMT (stmt), &op))
+ gcc_unreachable ();
+ tree reduc_def = gimple_arg (STMT_VINFO_STMT (stmt),
+ STMT_VINFO_REDUC_IDX (stmt));
+ next_stmt = vect_stmt_to_vectorize (vinfo->lookup_def (reduc_def));
+ gcc_assert (is_a <gphi *> (STMT_VINFO_STMT (next_stmt))
+ || STMT_VINFO_REDUC_IDX (next_stmt) != -1);
+ if (!gimple_extract_op (STMT_VINFO_STMT (vect_orig_stmt (stmt)), &op))
+ gcc_unreachable ();
+ if (CONVERT_EXPR_CODE_P (op.code)
+ && (first
+ || is_a <gphi *> (STMT_VINFO_STMT (next_stmt))))
+ ;
+ else if (code != op.code)
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: store group "
- "size not a multiple of the vector size "
- "in basic block SLP\n");
- vect_free_slp_tree (node);
- return false;
+ fail = true;
+ break;
}
- /* Fatal mismatch. */
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "SLP discovery succeeded but node needs "
- "splitting\n");
- memset (matches, true, group_size);
- matches[group_size / const_max_nunits * const_max_nunits] = false;
- vect_free_slp_tree (node);
+ else
+ scalar_stmts.safe_push (stmt);
+ first = false;
}
- else
+ while (!is_a <gphi *> (STMT_VINFO_STMT (next_stmt)));
+ if (!fail && scalar_stmts.length () > 1)
{
- /* Create a new SLP instance. */
- slp_instance new_instance = XNEW (class _slp_instance);
- SLP_INSTANCE_TREE (new_instance) = node;
- SLP_INSTANCE_LOADS (new_instance) = vNULL;
- SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
- SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
- SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain;
- new_instance->reduc_phis = NULL;
- new_instance->cost_vec = vNULL;
- new_instance->subgraph_entries = vNULL;
+ scalar_stmts.reverse ();
+ if (vect_analyze_slp_reduc_chain (vinfo, bst_map, scalar_stmts,
+ next_stmt, max_tree_size, limit))
+ return true;
+ scalar_stmts.release ();
+ }
+ }
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "SLP size %u vs. limit %u.\n",
- tree_size, max_tree_size);
+ scalar_stmts.create (1);
+ scalar_stmts.quick_push (scalar_stmt);
- /* Fixup SLP reduction chains. If this is a reduction chain with
- a conversion in front amend the SLP tree with a node for that. */
- gimple *scalar_def
- = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
- if (STMT_VINFO_DEF_TYPE (scalar_stmts[0]) != vect_reduction_def)
- {
- /* Get at the conversion stmt - we know it's the single use
- of the last stmt of the reduction chain. */
- use_operand_p use_p;
- bool r = single_imm_use (gimple_assign_lhs (scalar_def),
- &use_p, &scalar_def);
- gcc_assert (r);
- stmt_vec_info next_info = vinfo->lookup_stmt (scalar_def);
- next_info = vect_stmt_to_vectorize (next_info);
- scalar_stmts = vNULL;
- scalar_stmts.create (group_size);
- for (unsigned i = 0; i < group_size; ++i)
- scalar_stmts.quick_push (next_info);
- slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
- SLP_TREE_VECTYPE (conv)
- = get_vectype_for_scalar_type (vinfo,
- TREE_TYPE
- (gimple_assign_lhs (scalar_def)),
- group_size);
- SLP_TREE_REDUC_IDX (conv) = 0;
- conv->cycle_info.id = node->cycle_info.id;
- SLP_TREE_CHILDREN (conv).quick_push (node);
- SLP_INSTANCE_TREE (new_instance) = conv;
- /* We also have to fake this conversion stmt as SLP reduction
- group so we don't have to mess with too much code
- elsewhere. */
- REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
- REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
- }
- /* Fill the backedge child of the PHI SLP node. The
- general matching code cannot find it because the
- scalar code does not reflect how we vectorize the
- reduction. */
- use_operand_p use_p;
- imm_use_iterator imm_iter;
- class loop *loop = LOOP_VINFO_LOOP (as_a <loop_vec_info> (vinfo));
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter,
- gimple_get_lhs (scalar_def))
- /* There are exactly two non-debug uses, the reduction
- PHI and the loop-closed PHI node. */
- if (!is_gimple_debug (USE_STMT (use_p))
- && gimple_bb (USE_STMT (use_p)) == loop->header)
- {
- auto_vec<stmt_vec_info, 64> phis (group_size);
- stmt_vec_info phi_info
- = vinfo->lookup_stmt (USE_STMT (use_p));
- for (unsigned i = 0; i < group_size; ++i)
- phis.quick_push (phi_info);
- slp_tree *phi_node = bst_map->get (phis);
- unsigned dest_idx = loop_latch_edge (loop)->dest_idx;
- SLP_TREE_CHILDREN (*phi_node)[dest_idx]
- = SLP_INSTANCE_TREE (new_instance);
- SLP_INSTANCE_TREE (new_instance)->refcnt++;
- }
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Starting SLP discovery for\n");
+ for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " %G", scalar_stmts[i]->stmt);
+ }
- vinfo->slp_instances.safe_push (new_instance);
+ /* Build the tree for the SLP instance. */
+ unsigned int group_size = scalar_stmts.length ();
+ bool *matches = XALLOCAVEC (bool, group_size);
+ poly_uint64 max_nunits = 1;
+ unsigned tree_size = 0;
- /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
- the number of scalar stmts in the root in a few places.
- Verify that assumption holds. */
- gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
- .length () == group_size);
+ slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+ &max_nunits, matches, limit,
+ &tree_size, bst_map);
+ if (node != NULL)
+ {
+ /* Create a new SLP instance. */
+ slp_instance new_instance = XNEW (class _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ SLP_INSTANCE_ROOT_STMTS (new_instance) = vNULL;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = vNULL;
+ SLP_INSTANCE_KIND (new_instance) = kind;
+ new_instance->reduc_phis = NULL;
+ new_instance->cost_vec = vNULL;
+ new_instance->subgraph_entries = vNULL;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Final SLP tree for instance %p:\n",
- (void *) new_instance);
- vect_print_slp_graph (MSG_NOTE, vect_location,
- SLP_INSTANCE_TREE (new_instance));
- }
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP size %u vs. limit %u.\n",
+ tree_size, max_tree_size);
- return true;
+ vinfo->slp_instances.safe_push (new_instance);
+
+ /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
+ the number of scalar stmts in the root in a few places.
+ Verify that assumption holds. */
+ gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
+ .length () == group_size);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Final SLP tree for instance %p:\n",
+ (void *) new_instance);
+ vect_print_slp_graph (MSG_NOTE, vect_location,
+ SLP_INSTANCE_TREE (new_instance));
}
+
+ return true;
}
/* Failed to SLP. */
if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
{
- /* Find SLP sequences starting from reduction chains. */
- FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
- if (! STMT_VINFO_RELEVANT_P (first_element)
- && ! STMT_VINFO_LIVE_P (first_element))
- ;
- else if (force_single_lane
- || ! vect_analyze_slp_reduc_chain (vinfo, bst_map,
- first_element,
- max_tree_size, &limit))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "SLP discovery of reduction chain failed\n");
- /* Dissolve reduction chain group. */
- stmt_vec_info vinfo = first_element;
- stmt_vec_info last = NULL;
- while (vinfo)
- {
- stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
- REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
- REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
- last = vinfo;
- vinfo = next;
- }
- STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
- /* ??? When there's a conversion around the reduction
- chain 'last' isn't the entry of the reduction. */
- if (STMT_VINFO_DEF_TYPE (last) != vect_reduction_def)
- return opt_result::failure_at (vect_location,
- "SLP build failed.\n");
- /* It can be still vectorized as part of an SLP reduction. */
- loop_vinfo->reductions.safe_push (last);
- }
-
/* Find SLP sequences starting from groups of reductions. */
if (loop_vinfo->reductions.length () > 0)
{
if (!force_single_lane
&& !lane_reducing_stmt_p (STMT_VINFO_STMT (next_info)))
scalar_stmts.quick_push (next_info);
- else
- {
- /* Do SLP discovery for single-lane reductions. */
- vec<stmt_vec_info> stmts;
- vec<stmt_vec_info> roots = vNULL;
- vec<tree> remain = vNULL;
- stmts.create (1);
- stmts.quick_push (next_info);
- if (! vect_build_slp_instance (vinfo,
- slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map,
- force_single_lane))
- return opt_result::failure_at (vect_location,
- "SLP build failed.\n");
- }
+ /* Do SLP discovery for single-lane reductions. */
+ else if (! vect_analyze_slp_reduction (loop_vinfo, next_info,
+ max_tree_size, &limit,
+ bst_map,
+ force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
}
/* Save for re-processing on failure. */
scalar_stmts.release ();
/* Do SLP discovery for single-lane reductions. */
for (auto stmt_info : saved_stmts)
- {
- vec<stmt_vec_info> stmts;
- vec<stmt_vec_info> roots = vNULL;
- vec<tree> remain = vNULL;
- stmts.create (1);
- stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
- if (! vect_build_slp_instance (vinfo,
- slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, force_single_lane))
- return opt_result::failure_at (vect_location,
- "SLP build failed.\n");
- }
+ if (! vect_analyze_slp_reduction (loop_vinfo,
+ vect_stmt_to_vectorize
+ (stmt_info),
+ max_tree_size, &limit,
+ bst_map, force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
saved_stmts.release ();
}