if (STMT_VINFO_LIVE_P (phi_info))
return false;
- ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ ncopies = vect_get_num_copies (loop_vinfo, slp_node);
gcc_assert (ncopies >= 1);
/* Leave the scalar phi in place. */
return true;
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ vec_num = vect_get_num_copies (loop_vinfo, slp_node);
/* Check whether we should use a single PHI node and accumulate
vectors to one before the backedge. */
/* Vectorizes PHIs. */
bool
-vectorizable_phi (bb_vec_info,
+vectorizable_phi (bb_vec_info vinfo,
stmt_vec_info stmt_info,
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
for the scalar and the vector PHIs. This avoids artificially
favoring the vector path (but may pessimize it in some cases). */
if (gimple_phi_num_args (as_a <gphi *> (stmt_info->stmt)) > 1)
- record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ record_stmt_cost (cost_vec, vect_get_num_copies (vinfo, slp_node),
vector_stmt, slp_node, vectype, 0, vect_body);
SLP_TREE_TYPE (slp_node) = phi_info_type;
return true;
return false;
tree vectype = SLP_TREE_VECTYPE (slp_node);
- unsigned ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ unsigned ncopies = vect_get_num_copies (loop_vinfo, slp_node);
poly_int64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
unsigned dist = SLP_TREE_LANES (slp_node);
/* We need to be able to make progress with a single vector. */
}
}
+ unsigned nvects = vect_get_num_copies (loop_vinfo, slp_node);
if (cost_vec) /* transformation not required. */
{
unsigned inside_cost = 0, prologue_cost = 0;
return false;
}
/* loop cost for vec_loop. */
- inside_cost = record_stmt_cost (cost_vec,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ inside_cost = record_stmt_cost (cost_vec, nvects,
vector_stmt, slp_node, 0, vect_body);
/* prologue cost for vec_init (if not nested) and step. */
prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
}
/* Now generate the IVs. */
- unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
gcc_assert (multiple_p (nunits * nvects, group_size));
unsigned nivs;
unsigned HOST_WIDE_INT const_nunits;
all the slp vectors. Calculate which slp vector it is and the index
within. */
int num_scalar = SLP_TREE_LANES (slp_node);
- int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ int num_vec = vect_get_num_copies (vinfo, slp_node);
poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
/* Calculate which vector contains the result, and which lane of
"the loop.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
- else if (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)
+ else if (num_vec > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
SLP_TREE_SCALAR_STMTS (this) = vNULL;
SLP_TREE_SCALAR_OPS (this) = vNULL;
SLP_TREE_VEC_DEFS (this) = vNULL;
- SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
SLP_TREE_CHILDREN (this) = vNULL;
SLP_TREE_LOAD_PERMUTATION (this) = vNULL;
SLP_TREE_LANE_PERMUTATION (this) = vNULL;
slp_instance node_instance,
stmt_vector_for_cost *cost_vec)
{
- /* Calculate the number of vector statements to be created for the scalar
- stmts in this node. It is the number of scalar elements in one scalar
- iteration (DR_GROUP_SIZE) multiplied by VF divided by the number of
- elements in a vector. For single-defuse-cycle, lane-reducing op, and
- PHI statement that starts reduction comprised of only lane-reducing ops,
- the number is more than effective vector statements actually required. */
- if (SLP_TREE_VECTYPE (node))
- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
- else
- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
-
/* Handle purely internal nodes. */
if (SLP_TREE_PERMUTE_P (node))
{
by NODE. */
static void
-vect_prologue_cost_for_slp (slp_tree node,
+vect_prologue_cost_for_slp (vec_info *vinfo, slp_tree node,
stmt_vector_for_cost *cost_vec)
{
/* There's a special case of an existing vector, that costs nothing. */
unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
unsigned HOST_WIDE_INT const_nunits;
unsigned nelt_limit;
+ unsigned nvectors = vect_get_num_copies (vinfo, node);
auto ops = &SLP_TREE_SCALAR_OPS (node);
- auto_vec<unsigned int> starts (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
+ auto_vec<unsigned int> starts (nvectors);
if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
&& ! multiple_p (const_nunits, group_size))
{
nelt_limit = const_nunits;
hash_set<vect_scalar_ops_slice_hash> vector_ops;
- for (unsigned int i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); ++i)
+ for (unsigned int i = 0; i < nvectors; ++i)
if (!vector_ops.add ({ ops, i * nelt_limit, nelt_limit }))
starts.quick_push (i * nelt_limit);
}
continue;
}
- SLP_TREE_NUMBER_OF_VEC_STMTS (child)
- = vect_get_num_copies (vinfo, child);
/* And cost them. */
- vect_prologue_cost_for_slp (child, cost_vec);
+ vect_prologue_cost_for_slp (vinfo, child, cost_vec);
}
/* If this node or any of its children can't be vectorized, try pruning
/* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
vector_type = SLP_TREE_VECTYPE (op_node);
- unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (op_node);
+ unsigned int number_of_vectors = vect_get_num_copies (vinfo, op_node);
SLP_TREE_VEC_DEFS (op_node).create (number_of_vectors);
auto_vec<tree> voprnds (number_of_vectors);
void
vect_get_slp_defs (slp_tree slp_node, vec<tree> *vec_defs)
{
- vec_defs->create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
+ vec_defs->create (SLP_TREE_VEC_DEFS (slp_node).length ());
vec_defs->splice (SLP_TREE_VEC_DEFS (slp_node));
}
mode = TYPE_MODE (vectype);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- unsigned int nstmts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ unsigned int nstmts = vect_get_num_copies (vinfo, node);
/* Initialize the vect stmts of NODE to properly insert the generated
stmts later. */
if (n_loads)
{
if (repeating_p)
- *n_loads = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ *n_loads = nstmts;
else
{
/* Enforced above when !repeating_p. */
unsigned vec_idx = (SLP_TREE_LANE_PERMUTATION (node)[0].second
/ SLP_TREE_LANES (node));
unsigned vec_num = SLP_TREE_LANES (child) / SLP_TREE_LANES (node);
- for (unsigned i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); ++i)
+ unsigned nvectors = vect_get_num_copies (vinfo, node);
+ for (unsigned i = 0; i < nvectors; ++i)
{
tree def = SLP_TREE_VEC_DEFS (child)[i * vec_num + vec_idx];
node->push_vec_def (def);
return;
}
- gcc_assert (SLP_TREE_VEC_DEFS (node).is_empty ());
-
stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
- gcc_assert (!SLP_TREE_VECTYPE (node)
- || SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
- if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0)
- SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
+ gcc_assert (SLP_TREE_VEC_DEFS (node).is_empty ());
+ if (SLP_TREE_VECTYPE (node))
+ SLP_TREE_VEC_DEFS (node).create (vect_get_num_copies (vinfo, node));
if (!SLP_TREE_PERMUTE_P (node) && STMT_VINFO_DATA_REF (stmt_info))
{
if (instance->kind == slp_inst_kind_ctor)
{
- if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
+ if (SLP_TREE_VEC_DEFS (node).length () == 1)
{
tree vect_lhs = SLP_TREE_VEC_DEFS (node)[0];
tree root_lhs = gimple_get_lhs (instance->root_stmts[0]->stmt);
vect_lhs);
rstmt = gimple_build_assign (root_lhs, vect_lhs);
}
- else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
+ else
{
- int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ gcc_assert (SLP_TREE_VEC_DEFS (node).length () > 1);
tree child_def;
int j;
vec<constructor_elt, va_gc> *v;
- vec_alloc (v, nelts);
+ vec_alloc (v, SLP_TREE_VEC_DEFS (node).length ());
/* A CTOR can handle V16HI composition from VNx8HI so we
do not need to convert vector elements if the types
of the same KIND. */
static void
-vect_model_simple_cost (vec_info *, int n, slp_tree node,
+vect_model_simple_cost (vec_info *vinfo, int n, slp_tree node,
stmt_vector_for_cost *cost_vec,
vect_cost_for_stmt kind = vector_stmt)
{
gcc_assert (cost_vec != NULL);
- n *= SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ n *= vect_get_num_copies (vinfo, node);
/* Pass the inside-of-loop statements to the target-specific cost model. */
inside_cost += record_stmt_cost (cost_vec, n, kind, node, 0, vect_body);
record_stmt_cost (cost_vec,
1, vector_stmt, slp_node, 0, vect_prologue);
record_stmt_cost (cost_vec,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ vect_get_num_copies (vinfo, slp_node),
vec_perm, slp_node, 0, vect_body);
return true;
}
int len_opno = internal_fn_len_index (cond_len_fn);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
+ unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
if (cost_vec) /* transformation not required. */
{
for (i = 0; i < nargs; ++i)
}
else
{
- unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
tree scalar_mask = NULL_TREE;
if (mask_opno >= 0)
scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
/* Build argument list for the vectorized call. */
if (cfn == CFN_GOMP_SIMD_LANE)
{
- for (i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++i)
+ for (i = 0; i < nvectors; ++i)
{
/* ??? For multi-lane SLP we'd need to build
{ 0, 0, .., 1, 1, ... }. */
return false;
}
DUMP_VECT_SCOPE ("vectorizable_conversion");
+ unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
if (modifier == NONE)
{
SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type;
{
SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type;
/* The final packing step produces one vector result per copy. */
- unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
vect_model_promotion_demotion_cost (slp_node, nvectors,
multi_step_cvt, cost_vec,
widen_arith);
/* The initial unpacking step produces two vector results
per copy. MULTI_STEP_CVT is 0 for a single conversion,
so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
- unsigned int nvectors
- = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt;
- vect_model_promotion_demotion_cost (slp_node, nvectors,
+ vect_model_promotion_demotion_cost (slp_node,
+ nvectors >> multi_step_cvt,
multi_step_cvt, cost_vec,
widen_arith);
}
scalar shift operand but code-generation below simply always
takes the first. */
if (dt[1] == vect_internal_def
- && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ && maybe_ne (nunits_out * vect_get_num_copies (vinfo, slp_node),
stmts.length ()))
scalar_shift_arg = false;
/* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ unsigned nvectors = vect_get_num_copies (vinfo, slp_node);
if (scalar_shift_arg && dt[1] != vect_internal_def)
{
/* Vector shl and shr insn patterns can be defined with scalar
dump_printf_loc (MSG_NOTE, vect_location,
"operand 1 using scalar mode.\n");
vec_oprnd1 = op1;
- vec_oprnds1.create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
+ vec_oprnds1.create (nvectors);
vec_oprnds1.quick_push (vec_oprnd1);
- /* Store vec_oprnd1 for every vector stmt to be created.
- We check during the analysis that all the shift arguments
- are the same.
- TODO: Allow different constants for different vector
- stmts generated for an SLP instance. */
- for (k = 0;
- k < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) - 1; k++)
+ /* Store vec_oprnd1 for every vector stmt to be created.
+ We check during the analysis that all the shift arguments
+ are the same.
+ TODO: Allow different constants for different vector
+ stmts generated for an SLP instance. */
+ for (k = 0; k < nvectors - 1; k++)
vec_oprnds1.quick_push (vec_oprnd1);
}
}
gsi);
vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
gsi);
- vec_oprnds1.create (slp_node->vec_stmts_size);
- for (k = 0; k < slp_node->vec_stmts_size; k++)
+ vec_oprnds1.create (nvectors);
+ for (k = 0; k < nvectors; k++)
vec_oprnds1.quick_push (vec_oprnd1);
}
else if (dt[1] == vect_constant_def)
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. */
- auto vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ auto vec_num = vect_get_num_copies (vinfo, slp_node);
/* Reject attempts to combine mask types with nonmask types, e.g. if
we have an AND between a (nonmask) boolean loaded from memory and
in the prologue and (mis-)costs one of the stmts as
vector stmt. See below for the actual lowering that will
be applied. */
- unsigned n = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ unsigned n = vect_get_num_copies (vinfo, slp_node);
switch (code)
{
case PLUS_EXPR:
}
else
vf = 1;
+ vec_num = vect_get_num_copies (vinfo, slp_node);
/* FORNOW. This restriction should be relaxed. */
if (loop
&& nested_in_vect_loop_p (loop, stmt_info)
- && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)
+ && vec_num > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
{
first_stmt_info = stmt_info;
first_dr_info = dr_info;
- group_size = vec_num = 1;
+ group_size = 1;
}
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec)
|| !nested_in_vect_loop_p (loop, stmt_info));
grouped_store = false;
- /* VEC_NUM is the number of vect stmts to be created for this
- group. */
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
|| (DR_GROUP_FIRST_ELEMENT (first_stmt_info) == first_stmt_info));
if (nstores > 1)
align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
- int ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ int ncopies = vec_num;
if (!costing_p)
{
/* For costing some adjacent vector stores, we'd like to cost with
the total number of them once instead of cost each one by one. */
unsigned int n_adjacent_stores = 0;
- int ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) / group_size;
+ int ncopies = vec_num / group_size;
for (j = 0; j < ncopies; j++)
{
if (j == 0)
else
vf = 1;
+ vec_num = vect_get_num_copies (vinfo, slp_node);
+
/* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop
- && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)
+ if (nested_in_vect_loop && vec_num > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
vectype, &gsi2);
}
gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
- for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
+ for (j = 0; j < (int) vec_num; ++j)
slp_node->push_vec_def (new_stmt);
return true;
}
dr_chain.create (ncopies);
}
else
- ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ ncopies = vec_num;
unsigned int group_el = 0;
unsigned HOST_WIDE_INT
/* We do not support grouped accesses in a nested loop,
instead the access is contiguous but it might be
permuted. No gap adjustment is needed though. */
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ ;
else if (slp_perm
&& (group_size != scalar_lanes
|| !multiple_p (nunits, group_size)))
}
else
{
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
group_gap_adj = group_size - scalar_lanes;
}
{
first_stmt_info = stmt_info;
first_dr_info = dr_info;
- group_size = vec_num = 1;
+ group_size = 1;
group_gap_adj = 0;
ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
vec_loop_masks *loop_masks
/* For costing some adjacent vector loads, we'd like to cost with
the total number of them once instead of cost each one by one. */
unsigned int n_adjacent_loads = 0;
- int ncopies = slp_node->vec_stmts_size / group_size;
+ int ncopies = vec_num / group_size;
for (j = 0; j < ncopies; j++)
{
if (costing_p)
tree vectype = SLP_TREE_VECTYPE (slp_node);
tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
- int vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ int vec_num = vect_get_num_copies (vinfo, slp_node);
cond_expr = gimple_assign_rhs1 (stmt);
gcc_assert (! COMPARISON_CLASS_P (cond_expr));
return false;
machine_mode mode = TYPE_MODE (vectype);
- int vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ int vec_num = vect_get_num_copies (loop_vinfo, slp_node);
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);