tree dataref_ptr = NULL_TREE;
tree dataref_offset = NULL_TREE;
gimple *ptr_incr = NULL;
- int ncopies;
int i, j;
unsigned int group_size;
poly_uint64 group_gap_adj;
bool compute_in_loop = false;
class loop *at_loop;
int vec_num;
- bool slp = (slp_node != NULL);
bool slp_perm = false;
bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
poly_uint64 vf;
return false;
mask_index = internal_fn_mask_index (ifn);
- if (mask_index >= 0 && slp_node)
+ if (mask_index >= 0)
mask_index = vect_slp_child_index_for_operand
(call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
if (mask_index >= 0
return false;
els_index = internal_fn_else_index (ifn);
- if (els_index >= 0 && slp_node)
+ if (els_index >= 0)
els_index = vect_slp_child_index_for_operand
(call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
if (els_index >= 0
else
vf = 1;
- /* Multiple types in SLP are handled by creating the appropriate number of
- vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
- case of SLP. */
- if (slp)
- ncopies = 1;
- else
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
-
- gcc_assert (ncopies >= 1);
-
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop
- && (ncopies > 1 || (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)))
+ && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
return false;
}
- /* Invalidate assumptions made by dependence analysis when vectorization
- on the unrolled body effectively re-orders stmts. */
- if (ncopies > 1
- && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
- && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
- STMT_VINFO_MIN_NEG_DIST (stmt_info)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "cannot perform implicit CSE when unrolling "
- "with negative dependence distance\n");
- return false;
- }
-
elem_type = TREE_TYPE (vectype);
mode = TYPE_MODE (vectype);
first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
group_size = DR_GROUP_SIZE (first_stmt_info);
- /* Refuse non-SLP vectorization of SLP-only groups. */
- if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "cannot vectorize load in non-SLP mode.\n");
- return false;
- }
-
/* Invalidate assumptions made by dependence analysis when vectorization
on the unrolled body effectively re-orders stmts. */
if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
int maskload_elsval = 0;
bool need_zeroing = false;
if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
- ncopies, &memory_access_type, &poffset,
+ 1, &memory_access_type, &poffset,
&alignment_support_scheme, &misalignment, &gs_info,
&lanes_ifn, &elsvals))
return false;
/* ??? The following checks should really be part of
get_group_load_store_type. */
- if (slp
- && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
+ if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
&& !((memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_GATHER_SCATTER)
&& SLP_TREE_LANES (slp_node) == 1))
}
}
- if (slp_node
- && slp_node->ldst_lanes
+ if (slp_node->ldst_lanes
&& memory_access_type != VMAT_LOAD_STORE_LANES)
{
if (dump_enabled_p ())
if (costing_p) /* transformation not required. */
{
- if (slp_node
- && mask
+ if (mask
&& !vect_maybe_update_slp_op_vectype (slp_op,
mask_vectype))
{
return false;
}
- if (!slp)
- STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
- else
- SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
+ SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
if (elsvals.length ())
maskload_elsval = *elsvals.begin ();
- if (!slp)
- gcc_assert (memory_access_type
- == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
- else
- gcc_assert (memory_access_type
- == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
+ gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
if (dump_enabled_p () && !costing_p)
- dump_printf_loc (MSG_NOTE, vect_location,
- "transform load. ncopies = %d\n", ncopies);
+ dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n");
/* Transform. */
vectype, &gsi2);
}
gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
- if (slp)
- for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
- slp_node->push_vec_def (new_stmt);
- else
- {
- for (j = 0; j < ncopies; ++j)
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
- *vec_stmt = new_stmt;
- }
+ for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
+ slp_node->push_vec_def (new_stmt);
return true;
}
/* For SLP permutation support we need to load the whole group,
not only the number of vector stmts the permutation result
fits in. */
+ int ncopies;
if (slp_perm)
{
/* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
return true;
}
- if (memory_access_type == VMAT_GATHER_SCATTER
- || (!slp && memory_access_type == VMAT_CONTIGUOUS))
+ if (memory_access_type == VMAT_GATHER_SCATTER)
grouped_load = false;
if (grouped_load
- || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
+ || SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
{
if (grouped_load)
{
}
/* For SLP vectorization we directly vectorize a subchain
without permutation. */
- if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+ if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
/* For BB vectorization always use the first stmt to base
the data ref pointer on. */
first_stmt_info_for_drptr
= vect_find_first_scalar_stmt_in_slp (slp_node);
- /* Check if the chain of loads is already vectorized. */
- if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
- /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
- ??? But we can only do so if there is exactly one
- as we have no way to get at the rest. Leave the CSE
- opportunity alone.
- ??? With the group load eventually participating
- in multiple different permutations (having multiple
- slp nodes which refer to the same group) the CSE
- is even wrong code. See PR56270. */
- && !slp)
- {
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
- return true;
- }
first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
group_gap_adj = 0;
/* VEC_NUM is the number of vect stmts to be created for this group. */
- if (slp)
- {
- grouped_load = false;
- /* If an SLP permutation is from N elements to N elements,
- and if one vector holds a whole number of N, we can load
- the inputs to the permutation in the same way as an
- unpermuted sequence. In other cases we need to load the
- whole group, not only the number of vector stmts the
- permutation result fits in. */
- unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
- if (nested_in_vect_loop)
- /* We do not support grouped accesses in a nested loop,
- instead the access is contiguous but it might be
- permuted. No gap adjustment is needed though. */
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- else if (slp_perm
- && (group_size != scalar_lanes
- || !multiple_p (nunits, group_size)))
- {
- /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
- variable VF; see vect_transform_slp_perm_load. */
- unsigned int const_vf = vf.to_constant ();
- unsigned int const_nunits = nunits.to_constant ();
- vec_num = CEIL (group_size * const_vf, const_nunits);
- group_gap_adj = vf * group_size - nunits * vec_num;
- }
- else
- {
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- group_gap_adj
- = group_size - scalar_lanes;
- }
- }
+ grouped_load = false;
+ /* If an SLP permutation is from N elements to N elements,
+ and if one vector holds a whole number of N, we can load
+ the inputs to the permutation in the same way as an
+ unpermuted sequence. In other cases we need to load the
+ whole group, not only the number of vector stmts the
+ permutation result fits in. */
+ unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
+ if (nested_in_vect_loop)
+ /* We do not support grouped accesses in a nested loop,
+ instead the access is contiguous but it might be
+ permuted. No gap adjustment is needed though. */
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ else if (slp_perm
+ && (group_size != scalar_lanes
+ || !multiple_p (nunits, group_size)))
+ {
+ /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
+ variable VF; see vect_transform_slp_perm_load. */
+ unsigned int const_vf = vf.to_constant ();
+ unsigned int const_nunits = nunits.to_constant ();
+ vec_num = CEIL (group_size * const_vf, const_nunits);
+ group_gap_adj = vf * group_size - nunits * vec_num;
+ }
else
- vec_num = group_size;
+ {
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ group_gap_adj = group_size - scalar_lanes;
+ }
ref_type = get_group_alias_ptr_type (first_stmt_info);
}
group_size = vec_num = 1;
group_gap_adj = 0;
ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
- if (slp)
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
gcc_assert (alignment_support_scheme);
auto_vec<tree> vec_offsets;
auto_vec<tree> vec_masks;
if (mask && !costing_p)
- {
- if (slp_node)
- vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
- &vec_masks);
- else
- vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
- &vec_masks, mask_vectype);
- }
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
+ &vec_masks);
tree vec_mask = NULL_TREE;
tree vec_els = NULL_TREE;
/* For costing some adjacent vector loads, we'd like to cost with
the total number of them once instead of cost each one by one. */
unsigned int n_adjacent_loads = 0;
- if (slp_node)
- ncopies = slp_node->vec_stmts_size / group_size;
+ int ncopies = slp_node->vec_stmts_size / group_size;
for (j = 0; j < ncopies; j++)
{
if (costing_p)
gimple_call_set_nothrow (call, true);
vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
- if (!slp)
- dr_chain.create (group_size);
/* Extract each vector into an SSA_NAME. */
for (unsigned i = 0; i < group_size; i++)
{
new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
vec_array, i, need_zeroing,
final_mask);
- if (slp)
- slp_node->push_vec_def (new_temp);
- else
- dr_chain.quick_push (new_temp);
+ slp_node->push_vec_def (new_temp);
}
- if (!slp)
- /* Record the mapping between SSA_NAMEs and statements. */
- vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
-
/* Record that VEC_ARRAY is now dead. */
vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
-
- if (!slp)
- dr_chain.release ();
-
- if (!slp_node)
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
}
if (costing_p)
gcc_assert (!grouped_load && !slp_perm);
unsigned int inside_cost = 0, prologue_cost = 0;
- for (j = 0; j < ncopies; j++)
+
+ /* 1. Create the vector or array pointer update chain. */
+ if (!costing_p)
{
- /* 1. Create the vector or array pointer update chain. */
- if (j == 0 && !costing_p)
- {
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
- slp_node, &gs_info, &dataref_ptr,
- &vec_offsets);
- else
- dataref_ptr
- = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
- at_loop, offset, &dummy, gsi,
- &ptr_incr, false, bump);
- }
- else if (!costing_p)
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
+ slp_node, &gs_info, &dataref_ptr,
+ &vec_offsets);
+ else
+ dataref_ptr
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+ at_loop, offset, &dummy, gsi,
+ &ptr_incr, false, bump);
+ }
+
+ gimple *new_stmt = NULL;
+ for (i = 0; i < vec_num; i++)
+ {
+ tree final_mask = NULL_TREE;
+ tree final_len = NULL_TREE;
+ tree bias = NULL_TREE;
+ if (!costing_p)
{
- gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
- if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ if (mask)
+ vec_mask = vec_masks[i];
+ if (loop_masks)
+ final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+ vec_num, vectype, i);
+ if (vec_mask)
+ final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+ final_mask, vec_mask, gsi);
+
+ if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
gsi, stmt_info, bump);
}
- gimple *new_stmt = NULL;
- for (i = 0; i < vec_num; i++)
+ /* 2. Create the vector-load in the loop. */
+ unsigned HOST_WIDE_INT align;
+ if (gs_info.ifn != IFN_LAST)
{
- tree final_mask = NULL_TREE;
- tree final_len = NULL_TREE;
- tree bias = NULL_TREE;
- if (!costing_p)
+ if (costing_p)
{
- if (mask)
- vec_mask = vec_masks[vec_num * j + i];
- if (loop_masks)
- final_mask
- = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
- vec_num * ncopies, vectype,
- vec_num * j + i);
- if (vec_mask)
- final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
- final_mask, vec_mask, gsi);
-
- if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
- gsi, stmt_info, bump);
+ unsigned int cnunits = vect_nunits_for_cost (vectype);
+ inside_cost
+ = record_stmt_cost (cost_vec, cnunits, scalar_load,
+ slp_node, 0, vect_body);
+ continue;
}
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ vec_offset = vec_offsets[i];
+ tree zero = build_zero_cst (vectype);
+ tree scale = size_int (gs_info.scale);
- /* 2. Create the vector-load in the loop. */
- unsigned HOST_WIDE_INT align;
- if (gs_info.ifn != IFN_LAST)
+ if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
{
- if (costing_p)
- {
- unsigned int cnunits = vect_nunits_for_cost (vectype);
- inside_cost
- = record_stmt_cost (cost_vec, cnunits, scalar_load,
- slp_node, 0, vect_body);
- continue;
- }
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vec_offset = vec_offsets[vec_num * j + i];
- tree zero = build_zero_cst (vectype);
- tree scale = size_int (gs_info.scale);
-
- if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
+ if (loop_lens)
+ final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+ vec_num, vectype, i, 1);
+ else
+ final_len = build_int_cst (sizetype,
+ TYPE_VECTOR_SUBPARTS (vectype));
+ signed char biasval
+ = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ bias = build_int_cst (intQI_type_node, biasval);
+ if (!final_mask)
{
- if (loop_lens)
- final_len
- = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
- vec_num * ncopies, vectype,
- vec_num * j + i, 1);
- else
- final_len
- = build_int_cst (sizetype,
- TYPE_VECTOR_SUBPARTS (vectype));
- signed char biasval
- = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
- bias = build_int_cst (intQI_type_node, biasval);
- if (!final_mask)
- {
- mask_vectype = truth_type_for (vectype);
- final_mask = build_minus_one_cst (mask_vectype);
- }
+ mask_vectype = truth_type_for (vectype);
+ final_mask = build_minus_one_cst (mask_vectype);
}
+ }
- if (final_mask)
- {
- vec_els = vect_get_mask_load_else
- (maskload_elsval, vectype);
- if (type_mode_padding_p
- && maskload_elsval != MASK_LOAD_ELSE_ZERO)
- need_zeroing = true;
- }
+ if (final_mask)
+ {
+ vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
+ if (type_mode_padding_p
+ && maskload_elsval != MASK_LOAD_ELSE_ZERO)
+ need_zeroing = true;
+ }
- gcall *call;
- if (final_len && final_mask)
- {
- if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
- call = gimple_build_call_internal (
- IFN_MASK_LEN_GATHER_LOAD, 8, dataref_ptr, vec_offset,
- scale, zero, final_mask, vec_els, final_len, bias);
- else
- /* Non-vector offset indicates that prefer to take
- MASK_LEN_STRIDED_LOAD instead of the
- MASK_LEN_GATHER_LOAD with direct stride arg. */
- call = gimple_build_call_internal (
- IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr, vec_offset,
- zero, final_mask, vec_els, final_len, bias);
- }
- else if (final_mask)
- call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
- 6, dataref_ptr,
- vec_offset, scale,
- zero, final_mask,
- vec_els);
+ gcall *call;
+ if (final_len && final_mask)
+ {
+ if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
+ call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
+ 8, dataref_ptr,
+ vec_offset, scale, zero,
+ final_mask, vec_els,
+ final_len, bias);
else
- call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
- dataref_ptr, vec_offset,
- scale, zero);
- gimple_call_set_nothrow (call, true);
- new_stmt = call;
+ /* Non-vector offset indicates that prefer to take
+ MASK_LEN_STRIDED_LOAD instead of the
+ MASK_LEN_GATHER_LOAD with direct stride arg. */
+ call = gimple_build_call_internal
+ (IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr,
+ vec_offset, zero, final_mask, vec_els, final_len,
+ bias);
+ }
+ else if (final_mask)
+ call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
+ 6, dataref_ptr,
+ vec_offset, scale,
+ zero, final_mask, vec_els);
+ else
+ call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
+ dataref_ptr, vec_offset,
+ scale, zero);
+ gimple_call_set_nothrow (call, true);
+ new_stmt = call;
+ data_ref = NULL_TREE;
+ }
+ else if (gs_info.decl)
+ {
+ /* The builtin decls path for gather is legacy, x86 only. */
+ gcc_assert (!final_len && nunits.is_constant ());
+ if (costing_p)
+ {
+ unsigned int cnunits = vect_nunits_for_cost (vectype);
+ inside_cost
+ = record_stmt_cost (cost_vec, cnunits, scalar_load,
+ slp_node, 0, vect_body);
+ continue;
+ }
+ poly_uint64 offset_nunits
+ = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+ if (known_eq (nunits, offset_nunits))
+ {
+ new_stmt = vect_build_one_gather_load_call
+ (vinfo, stmt_info, gsi, &gs_info,
+ dataref_ptr, vec_offsets[i], final_mask);
data_ref = NULL_TREE;
}
- else if (gs_info.decl)
+ else if (known_eq (nunits, offset_nunits * 2))
{
- /* The builtin decls path for gather is legacy, x86 only. */
- gcc_assert (!final_len && nunits.is_constant ());
- if (costing_p)
- {
- unsigned int cnunits = vect_nunits_for_cost (vectype);
- inside_cost
- = record_stmt_cost (cost_vec, cnunits, scalar_load,
- slp_node, 0, vect_body);
- continue;
- }
- poly_uint64 offset_nunits
- = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
- if (known_eq (nunits, offset_nunits))
- {
- new_stmt = vect_build_one_gather_load_call
- (vinfo, stmt_info, gsi, &gs_info,
- dataref_ptr, vec_offsets[vec_num * j + i],
- final_mask);
- data_ref = NULL_TREE;
- }
- else if (known_eq (nunits, offset_nunits * 2))
- {
- /* We have a offset vector with half the number of
- lanes but the builtins will produce full vectype
- data with just the lower lanes filled. */
- new_stmt = vect_build_one_gather_load_call
- (vinfo, stmt_info, gsi, &gs_info,
- dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
- final_mask);
- tree low = make_ssa_name (vectype);
- gimple_set_lhs (new_stmt, low);
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
-
- /* now put upper half of final_mask in final_mask low. */
- if (final_mask
- && !SCALAR_INT_MODE_P
- (TYPE_MODE (TREE_TYPE (final_mask))))
- {
- int count = nunits.to_constant ();
- vec_perm_builder sel (count, count, 1);
- sel.quick_grow (count);
- for (int i = 0; i < count; ++i)
- sel[i] = i | (count / 2);
- vec_perm_indices indices (sel, 2, count);
- tree perm_mask = vect_gen_perm_mask_checked
- (TREE_TYPE (final_mask), indices);
- new_stmt = gimple_build_assign (NULL_TREE,
- VEC_PERM_EXPR,
- final_mask,
- final_mask,
- perm_mask);
- final_mask = make_ssa_name (TREE_TYPE (final_mask));
- gimple_set_lhs (new_stmt, final_mask);
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- }
- else if (final_mask)
- {
- new_stmt = gimple_build_assign (NULL_TREE,
- VEC_UNPACK_HI_EXPR,
- final_mask);
- final_mask = make_ssa_name
- (truth_type_for (gs_info.offset_vectype));
- gimple_set_lhs (new_stmt, final_mask);
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- }
-
- new_stmt = vect_build_one_gather_load_call
- (vinfo, stmt_info, gsi, &gs_info,
- dataref_ptr,
- vec_offsets[2 * vec_num * j + 2 * i + 1],
- final_mask);
- tree high = make_ssa_name (vectype);
- gimple_set_lhs (new_stmt, high);
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
+ /* We have a offset vector with half the number of
+ lanes but the builtins will produce full vectype
+ data with just the lower lanes filled. */
+ new_stmt = vect_build_one_gather_load_call
+ (vinfo, stmt_info, gsi, &gs_info,
+ dataref_ptr, vec_offsets[2 * i], final_mask);
+ tree low = make_ssa_name (vectype);
+ gimple_set_lhs (new_stmt, low);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- /* compose low + high. */
+ /* now put upper half of final_mask in final_mask low. */
+ if (final_mask
+ && !SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (final_mask))))
+ {
int count = nunits.to_constant ();
vec_perm_builder sel (count, count, 1);
sel.quick_grow (count);
for (int i = 0; i < count; ++i)
- sel[i] = i < count / 2 ? i : i + count / 2;
+ sel[i] = i | (count / 2);
vec_perm_indices indices (sel, 2, count);
- tree perm_mask
- = vect_gen_perm_mask_checked (vectype, indices);
- new_stmt = gimple_build_assign (NULL_TREE,
- VEC_PERM_EXPR,
- low, high, perm_mask);
- data_ref = NULL_TREE;
+ tree perm_mask = vect_gen_perm_mask_checked
+ (TREE_TYPE (final_mask), indices);
+ new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
+ final_mask, final_mask,
+ perm_mask);
+ final_mask = make_ssa_name (TREE_TYPE (final_mask));
+ gimple_set_lhs (new_stmt, final_mask);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
}
- else if (known_eq (nunits * 2, offset_nunits))
+ else if (final_mask)
{
- /* We have a offset vector with double the number of
- lanes. Select the low/high part accordingly. */
- vec_offset = vec_offsets[(vec_num * j + i) / 2];
- if ((vec_num * j + i) & 1)
- {
- int count = offset_nunits.to_constant ();
- vec_perm_builder sel (count, count, 1);
- sel.quick_grow (count);
- for (int i = 0; i < count; ++i)
- sel[i] = i | (count / 2);
- vec_perm_indices indices (sel, 2, count);
- tree perm_mask = vect_gen_perm_mask_checked
- (TREE_TYPE (vec_offset), indices);
- new_stmt = gimple_build_assign (NULL_TREE,
- VEC_PERM_EXPR,
- vec_offset,
- vec_offset,
- perm_mask);
- vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
- gimple_set_lhs (new_stmt, vec_offset);
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- }
- new_stmt = vect_build_one_gather_load_call
- (vinfo, stmt_info, gsi, &gs_info,
- dataref_ptr, vec_offset, final_mask);
- data_ref = NULL_TREE;
+ new_stmt = gimple_build_assign (NULL_TREE,
+ VEC_UNPACK_HI_EXPR,
+ final_mask);
+ final_mask = make_ssa_name
+ (truth_type_for (gs_info.offset_vectype));
+ gimple_set_lhs (new_stmt, final_mask);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
}
- else
- gcc_unreachable ();
+
+ new_stmt = vect_build_one_gather_load_call
+ (vinfo, stmt_info, gsi, &gs_info, dataref_ptr,
+ vec_offsets[2 * i + 1], final_mask);
+ tree high = make_ssa_name (vectype);
+ gimple_set_lhs (new_stmt, high);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+
+ /* compose low + high. */
+ int count = nunits.to_constant ();
+ vec_perm_builder sel (count, count, 1);
+ sel.quick_grow (count);
+ for (int i = 0; i < count; ++i)
+ sel[i] = i < count / 2 ? i : i + count / 2;
+ vec_perm_indices indices (sel, 2, count);
+ tree perm_mask
+ = vect_gen_perm_mask_checked (vectype, indices);
+ new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
+ low, high, perm_mask);
+ data_ref = NULL_TREE;
}
- else
+ else if (known_eq (nunits * 2, offset_nunits))
{
- /* Emulated gather-scatter. */
- gcc_assert (!final_mask);
- unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
- if (costing_p)
+ /* We have a offset vector with double the number of
+ lanes. Select the low/high part accordingly. */
+ vec_offset = vec_offsets[i / 2];
+ if (i & 1)
{
- /* For emulated gathers N offset vector element
- offset add is consumed by the load). */
- inside_cost = record_stmt_cost (cost_vec, const_nunits,
- vec_to_scalar,
- slp_node, 0, vect_body);
- /* N scalar loads plus gathering them into a
- vector. */
- inside_cost
- = record_stmt_cost (cost_vec, const_nunits, scalar_load,
- slp_node, 0, vect_body);
- inside_cost
- = record_stmt_cost (cost_vec, 1, vec_construct,
- slp_node, 0, vect_body);
- continue;
- }
- unsigned HOST_WIDE_INT const_offset_nunits
- = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
- .to_constant ();
- vec<constructor_elt, va_gc> *ctor_elts;
- vec_alloc (ctor_elts, const_nunits);
- gimple_seq stmts = NULL;
- /* We support offset vectors with more elements
- than the data vector for now. */
- unsigned HOST_WIDE_INT factor
- = const_offset_nunits / const_nunits;
- vec_offset = vec_offsets[(vec_num * j + i) / factor];
- unsigned elt_offset
- = ((vec_num * j + i) % factor) * const_nunits;
- tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
- tree scale = size_int (gs_info.scale);
- align = get_object_alignment (DR_REF (first_dr_info->dr));
- tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
- for (unsigned k = 0; k < const_nunits; ++k)
- {
- tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
- bitsize_int (k + elt_offset));
- tree idx
- = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
- vec_offset, TYPE_SIZE (idx_type), boff);
- idx = gimple_convert (&stmts, sizetype, idx);
- idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
- scale);
- tree ptr = gimple_build (&stmts, PLUS_EXPR,
- TREE_TYPE (dataref_ptr),
- dataref_ptr, idx);
- ptr = gimple_convert (&stmts, ptr_type_node, ptr);
- tree elt = make_ssa_name (TREE_TYPE (vectype));
- tree ref = build2 (MEM_REF, ltype, ptr,
- build_int_cst (ref_type, 0));
- new_stmt = gimple_build_assign (elt, ref);
- gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
- gimple_seq_add_stmt (&stmts, new_stmt);
- CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
+ int count = offset_nunits.to_constant ();
+ vec_perm_builder sel (count, count, 1);
+ sel.quick_grow (count);
+ for (int i = 0; i < count; ++i)
+ sel[i] = i | (count / 2);
+ vec_perm_indices indices (sel, 2, count);
+ tree perm_mask = vect_gen_perm_mask_checked
+ (TREE_TYPE (vec_offset), indices);
+ new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
+ vec_offset, vec_offset,
+ perm_mask);
+ vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
+ gimple_set_lhs (new_stmt, vec_offset);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
}
- gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
- new_stmt = gimple_build_assign (
- NULL_TREE, build_constructor (vectype, ctor_elts));
+ new_stmt = vect_build_one_gather_load_call
+ (vinfo, stmt_info, gsi, &gs_info,
+ dataref_ptr, vec_offset, final_mask);
data_ref = NULL_TREE;
}
-
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
- /* DATA_REF is null if we've already built the statement. */
- if (data_ref)
+ else
+ gcc_unreachable ();
+ }
+ else
+ {
+ /* Emulated gather-scatter. */
+ gcc_assert (!final_mask);
+ unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
+ if (costing_p)
{
- vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
- new_stmt = gimple_build_assign (vec_dest, data_ref);
+ /* For emulated gathers N offset vector element
+ offset add is consumed by the load). */
+ inside_cost = record_stmt_cost (cost_vec, const_nunits,
+ vec_to_scalar,
+ slp_node, 0, vect_body);
+ /* N scalar loads plus gathering them into a
+ vector. */
+ inside_cost
+ = record_stmt_cost (cost_vec, const_nunits, scalar_load,
+ slp_node, 0, vect_body);
+ inside_cost
+ = record_stmt_cost (cost_vec, 1, vec_construct,
+ slp_node, 0, vect_body);
+ continue;
}
- new_temp = need_zeroing
- ? make_ssa_name (vectype)
- : make_ssa_name (vec_dest, new_stmt);
- gimple_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-
- /* If we need to explicitly zero inactive elements emit a
- VEC_COND_EXPR that does so. */
- if (need_zeroing)
+ unsigned HOST_WIDE_INT const_offset_nunits
+ = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype) .to_constant ();
+ vec<constructor_elt, va_gc> *ctor_elts;
+ vec_alloc (ctor_elts, const_nunits);
+ gimple_seq stmts = NULL;
+ /* We support offset vectors with more elements
+ than the data vector for now. */
+ unsigned HOST_WIDE_INT factor
+ = const_offset_nunits / const_nunits;
+ vec_offset = vec_offsets[i / factor];
+ unsigned elt_offset = (i % factor) * const_nunits;
+ tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
+ tree scale = size_int (gs_info.scale);
+ align = get_object_alignment (DR_REF (first_dr_info->dr));
+ tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
+ for (unsigned k = 0; k < const_nunits; ++k)
{
- vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
- vectype);
-
- tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
- new_stmt
- = gimple_build_assign (new_temp2, VEC_COND_EXPR,
- final_mask, new_temp, vec_els);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
- gsi);
- new_temp = new_temp2;
+ tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
+ bitsize_int (k + elt_offset));
+ tree idx = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
+ vec_offset, TYPE_SIZE (idx_type),
+ boff);
+ idx = gimple_convert (&stmts, sizetype, idx);
+ idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
+ tree ptr = gimple_build (&stmts, PLUS_EXPR,
+ TREE_TYPE (dataref_ptr),
+ dataref_ptr, idx);
+ ptr = gimple_convert (&stmts, ptr_type_node, ptr);
+ tree elt = make_ssa_name (TREE_TYPE (vectype));
+ tree ref = build2 (MEM_REF, ltype, ptr,
+ build_int_cst (ref_type, 0));
+ new_stmt = gimple_build_assign (elt, ref);
+ gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
+ gimple_seq_add_stmt (&stmts, new_stmt);
+ CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
}
-
- /* Store vector loads in the corresponding SLP_NODE. */
- if (slp)
- slp_node->push_vec_def (new_stmt);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ new_stmt = gimple_build_assign (NULL_TREE,
+ build_constructor (vectype,
+ ctor_elts));
+ data_ref = NULL_TREE;
}
- if (!slp && !costing_p)
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
- }
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ /* DATA_REF is null if we've already built the statement. */
+ if (data_ref)
+ {
+ vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+ new_stmt = gimple_build_assign (vec_dest, data_ref);
+ }
+ new_temp = (need_zeroing
+ ? make_ssa_name (vectype)
+ : make_ssa_name (vec_dest, new_stmt));
+ gimple_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- if (!slp && !costing_p)
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+ /* If we need to explicitly zero inactive elements emit a
+ VEC_COND_EXPR that does so. */
+ if (need_zeroing)
+ {
+ vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
+ vectype);
+
+ tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
+ new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
+ final_mask, new_temp, vec_els);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ new_temp = new_temp2;
+ }
+
+ /* Store vector loads in the corresponding SLP_NODE. */
+ slp_node->push_vec_def (new_stmt);
+ }
if (costing_p && dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
/* For costing some adjacent vector loads, we'd like to cost with
the total number of them once instead of cost each one by one. */
unsigned int n_adjacent_loads = 0;
- for (j = 0; j < ncopies; j++)
+
+ /* 1. Create the vector or array pointer update chain. */
+ if (!costing_p)
{
- /* 1. Create the vector or array pointer update chain. */
- if (j == 0 && !costing_p)
- {
- bool simd_lane_access_p
- = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
- if (simd_lane_access_p
- && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
- && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
- && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
- && integer_zerop (DR_INIT (first_dr_info->dr))
- && alias_sets_conflict_p (get_alias_set (aggr_type),
- get_alias_set (TREE_TYPE (ref_type)))
- && (alignment_support_scheme == dr_aligned
- || alignment_support_scheme == dr_unaligned_supported))
- {
- dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
- dataref_offset = build_int_cst (ref_type, 0);
- }
- else if (diff_first_stmt_info)
- {
- dataref_ptr
- = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
- aggr_type, at_loop, offset, &dummy,
- gsi, &ptr_incr, simd_lane_access_p,
- bump);
- /* Adjust the pointer by the difference to first_stmt. */
- data_reference_p ptrdr
- = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
- tree diff
- = fold_convert (sizetype,
- size_binop (MINUS_EXPR,
- DR_INIT (first_dr_info->dr),
- DR_INIT (ptrdr)));
- dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
- stmt_info, diff);
- if (alignment_support_scheme == dr_explicit_realign)
- {
- msq = vect_setup_realignment (vinfo,
- first_stmt_info_for_drptr, gsi,
- &realignment_token,
- alignment_support_scheme,
- dataref_ptr, &at_loop);
- gcc_assert (!compute_in_loop);
- }
+ bool simd_lane_access_p
+ = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
+ if (simd_lane_access_p
+ && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
+ && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
+ && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
+ && integer_zerop (DR_INIT (first_dr_info->dr))
+ && alias_sets_conflict_p (get_alias_set (aggr_type),
+ get_alias_set (TREE_TYPE (ref_type)))
+ && (alignment_support_scheme == dr_aligned
+ || alignment_support_scheme == dr_unaligned_supported))
+ {
+ dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
+ dataref_offset = build_int_cst (ref_type, 0);
+ }
+ else if (diff_first_stmt_info)
+ {
+ dataref_ptr
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
+ aggr_type, at_loop, offset, &dummy,
+ gsi, &ptr_incr, simd_lane_access_p,
+ bump);
+ /* Adjust the pointer by the difference to first_stmt. */
+ data_reference_p ptrdr
+ = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
+ tree diff = fold_convert (sizetype,
+ size_binop (MINUS_EXPR,
+ DR_INIT (first_dr_info->dr),
+ DR_INIT (ptrdr)));
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+ stmt_info, diff);
+ if (alignment_support_scheme == dr_explicit_realign)
+ {
+ msq = vect_setup_realignment (vinfo,
+ first_stmt_info_for_drptr, gsi,
+ &realignment_token,
+ alignment_support_scheme,
+ dataref_ptr, &at_loop);
+ gcc_assert (!compute_in_loop);
}
- else
- dataref_ptr
- = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
- at_loop,
- offset, &dummy, gsi, &ptr_incr,
- simd_lane_access_p, bump);
- }
- else if (!costing_p)
- {
- gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
- if (dataref_offset)
- dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
- bump);
- else
- dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
- stmt_info, bump);
}
+ else
+ dataref_ptr
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+ at_loop,
+ offset, &dummy, gsi, &ptr_incr,
+ simd_lane_access_p, bump);
+ }
+ else if (!costing_p)
+ {
+ gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+ if (dataref_offset)
+ dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+ else
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+ stmt_info, bump);
+ }
- if (grouped_load || slp_perm)
- dr_chain.create (vec_num);
+ if (grouped_load || slp_perm)
+ dr_chain.create (vec_num);
- gimple *new_stmt = NULL;
- for (i = 0; i < vec_num; i++)
+ gimple *new_stmt = NULL;
+ for (i = 0; i < vec_num; i++)
+ {
+ tree final_mask = NULL_TREE;
+ tree final_len = NULL_TREE;
+ tree bias = NULL_TREE;
+
+ if (!costing_p)
{
- tree final_mask = NULL_TREE;
- tree final_len = NULL_TREE;
- tree bias = NULL_TREE;
+ if (mask)
+ vec_mask = vec_masks[i];
+ if (loop_masks)
+ final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+ vec_num, vectype, i);
+ if (vec_mask)
+ final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+ final_mask, vec_mask, gsi);
- if (!costing_p)
- {
- if (mask)
- vec_mask = vec_masks[vec_num * j + i];
- if (loop_masks)
- final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
- vec_num * ncopies, vectype,
- vec_num * j + i);
- if (vec_mask)
- final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
- final_mask, vec_mask, gsi);
+ if (i > 0)
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+ gsi, stmt_info, bump);
+ }
- if (i > 0)
- dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
- gsi, stmt_info, bump);
- }
+ /* 2. Create the vector-load in the loop. */
+ switch (alignment_support_scheme)
+ {
+ case dr_aligned:
+ case dr_unaligned_supported:
+ {
+ if (costing_p)
+ break;
- /* 2. Create the vector-load in the loop. */
- switch (alignment_support_scheme)
- {
- case dr_aligned:
- case dr_unaligned_supported:
+ unsigned int misalign;
+ unsigned HOST_WIDE_INT align;
+ align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+ if (alignment_support_scheme == dr_aligned)
+ misalign = 0;
+ else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
{
- if (costing_p)
- break;
-
- unsigned int misalign;
- unsigned HOST_WIDE_INT align;
- align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
- if (alignment_support_scheme == dr_aligned)
- misalign = 0;
- else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
- {
- align
- = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
- misalign = 0;
- }
- else
- misalign = misalignment;
- if (dataref_offset == NULL_TREE
- && TREE_CODE (dataref_ptr) == SSA_NAME)
- set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
- misalign);
- align = least_bit_hwi (misalign | align);
-
- /* Compute IFN when LOOP_LENS or final_mask valid. */
- machine_mode vmode = TYPE_MODE (vectype);
- machine_mode new_vmode = vmode;
- internal_fn partial_ifn = IFN_LAST;
- if (loop_lens)
- {
- opt_machine_mode new_ovmode
- = get_len_load_store_mode (vmode, true, &partial_ifn);
- new_vmode = new_ovmode.require ();
- unsigned factor
- = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
- final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
- vec_num * ncopies, vectype,
- vec_num * j + i, factor);
- }
- else if (final_mask)
- {
- if (!can_vec_mask_load_store_p (
- vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
- &partial_ifn))
- gcc_unreachable ();
- }
+ align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+ misalign = 0;
+ }
+ else
+ misalign = misalignment;
+ if (dataref_offset == NULL_TREE
+ && TREE_CODE (dataref_ptr) == SSA_NAME)
+ set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
+ misalign);
+ align = least_bit_hwi (misalign | align);
+
+ /* Compute IFN when LOOP_LENS or final_mask valid. */
+ machine_mode vmode = TYPE_MODE (vectype);
+ machine_mode new_vmode = vmode;
+ internal_fn partial_ifn = IFN_LAST;
+ if (loop_lens)
+ {
+ opt_machine_mode new_ovmode
+ = get_len_load_store_mode (vmode, true, &partial_ifn);
+ new_vmode = new_ovmode.require ();
+ unsigned factor
+ = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
+ final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+ vec_num, vectype, i, factor);
+ }
+ else if (final_mask)
+ {
+ if (!can_vec_mask_load_store_p (vmode,
+ TYPE_MODE
+ (TREE_TYPE (final_mask)),
+ true, &partial_ifn))
+ gcc_unreachable ();
+ }
- if (partial_ifn == IFN_MASK_LEN_LOAD)
+ if (partial_ifn == IFN_MASK_LEN_LOAD)
+ {
+ if (!final_len)
{
- if (!final_len)
- {
- /* Pass VF value to 'len' argument of
- MASK_LEN_LOAD if LOOP_LENS is invalid. */
- final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
- }
- if (!final_mask)
- {
- /* Pass all ones value to 'mask' argument of
- MASK_LEN_LOAD if final_mask is invalid. */
- mask_vectype = truth_type_for (vectype);
- final_mask = build_minus_one_cst (mask_vectype);
- }
+ /* Pass VF value to 'len' argument of
+ MASK_LEN_LOAD if LOOP_LENS is invalid. */
+ final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
}
- if (final_len)
+ if (!final_mask)
{
- signed char biasval
- = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
-
- bias = build_int_cst (intQI_type_node, biasval);
+ /* Pass all ones value to 'mask' argument of
+ MASK_LEN_LOAD if final_mask is invalid. */
+ mask_vectype = truth_type_for (vectype);
+ final_mask = build_minus_one_cst (mask_vectype);
}
+ }
+ if (final_len)
+ {
+ signed char biasval
+ = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ bias = build_int_cst (intQI_type_node, biasval);
+ }
- tree vec_els;
+ tree vec_els;
- if (final_len)
- {
- tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
- gcall *call;
- if (partial_ifn == IFN_MASK_LEN_LOAD)
- {
- vec_els = vect_get_mask_load_else
- (maskload_elsval, vectype);
- if (type_mode_padding_p
- && maskload_elsval != MASK_LOAD_ELSE_ZERO)
- need_zeroing = true;
- call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
- 6, dataref_ptr, ptr,
- final_mask, vec_els,
- final_len, bias);
- }
- else
- call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
- dataref_ptr, ptr,
- final_len, bias);
- gimple_call_set_nothrow (call, true);
- new_stmt = call;
- data_ref = NULL_TREE;
-
- /* Need conversion if it's wrapped with VnQI. */
- if (vmode != new_vmode)
- {
- tree new_vtype = build_vector_type_for_mode (
- unsigned_intQI_type_node, new_vmode);
- tree var
- = vect_get_new_ssa_name (new_vtype, vect_simple_var);
- gimple_set_lhs (call, var);
- vect_finish_stmt_generation (vinfo, stmt_info, call,
- gsi);
- tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
- new_stmt = gimple_build_assign (vec_dest,
- VIEW_CONVERT_EXPR, op);
- }
- }
- else if (final_mask)
+ if (final_len)
+ {
+ tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+ gcall *call;
+ if (partial_ifn == IFN_MASK_LEN_LOAD)
{
- tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
- vec_els = vect_get_mask_load_else
- (maskload_elsval, vectype);
+ vec_els = vect_get_mask_load_else (maskload_elsval,
+ vectype);
if (type_mode_padding_p
&& maskload_elsval != MASK_LOAD_ELSE_ZERO)
need_zeroing = true;
- gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
- dataref_ptr, ptr,
- final_mask,
- vec_els);
- gimple_call_set_nothrow (call, true);
- new_stmt = call;
- data_ref = NULL_TREE;
+ call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
+ 6, dataref_ptr, ptr,
+ final_mask, vec_els,
+ final_len, bias);
}
else
+ call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
+ dataref_ptr, ptr,
+ final_len, bias);
+ gimple_call_set_nothrow (call, true);
+ new_stmt = call;
+ data_ref = NULL_TREE;
+
+ /* Need conversion if it's wrapped with VnQI. */
+ if (vmode != new_vmode)
{
- tree ltype = vectype;
- tree new_vtype = NULL_TREE;
- unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
- unsigned HOST_WIDE_INT dr_size
- = vect_get_scalar_dr_size (first_dr_info);
- poly_int64 off = 0;
- if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
- off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
- unsigned int vect_align
- = vect_known_alignment_in_bytes (first_dr_info, vectype,
- off);
- /* Try to use a single smaller load when we are about
- to load excess elements compared to the unrolled
- scalar loop. */
- if (known_gt ((vec_num * j + i + 1) * nunits,
- (group_size * vf - gap)))
+ tree new_vtype
+ = build_vector_type_for_mode (unsigned_intQI_type_node,
+ new_vmode);
+ tree var = vect_get_new_ssa_name (new_vtype,
+ vect_simple_var);
+ gimple_set_lhs (call, var);
+ vect_finish_stmt_generation (vinfo, stmt_info, call,
+ gsi);
+ tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
+ new_stmt = gimple_build_assign (vec_dest,
+ VIEW_CONVERT_EXPR, op);
+ }
+ }
+ else if (final_mask)
+ {
+ tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+ vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
+ if (type_mode_padding_p
+ && maskload_elsval != MASK_LOAD_ELSE_ZERO)
+ need_zeroing = true;
+ gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
+ dataref_ptr, ptr,
+ final_mask,
+ vec_els);
+ gimple_call_set_nothrow (call, true);
+ new_stmt = call;
+ data_ref = NULL_TREE;
+ }
+ else
+ {
+ tree ltype = vectype;
+ tree new_vtype = NULL_TREE;
+ unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
+ unsigned HOST_WIDE_INT dr_size
+ = vect_get_scalar_dr_size (first_dr_info);
+ poly_int64 off = 0;
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
+ unsigned int vect_align
+ = vect_known_alignment_in_bytes (first_dr_info, vectype,
+ off);
+ /* Try to use a single smaller load when we are about
+ to load excess elements compared to the unrolled
+ scalar loop. */
+ if (known_gt ((i + 1) * nunits,
+ (group_size * vf - gap)))
+ {
+ poly_uint64 remain = ((group_size * vf - gap) - i * nunits);
+ if (known_ge ((i + 1) * nunits - (group_size * vf - gap),
+ nunits))
+ /* DR will be unused. */
+ ltype = NULL_TREE;
+ else if (known_ge (vect_align,
+ tree_to_poly_uint64
+ (TYPE_SIZE_UNIT (vectype))))
+ /* Aligned access to excess elements is OK if
+ at least one element is accessed in the
+ scalar loop. */
+ ;
+ else if (known_gt (vect_align,
+ ((nunits - remain) * dr_size)))
+ /* Aligned access to the gap area when there's
+ at least one element in it is OK. */
+ ;
+ else
{
- poly_uint64 remain = ((group_size * vf - gap)
- - (vec_num * j + i) * nunits);
- if (known_ge ((vec_num * j + i + 1) * nunits
- - (group_size * vf - gap), nunits))
- /* DR will be unused. */
- ltype = NULL_TREE;
- else if (known_ge (vect_align,
- tree_to_poly_uint64
- (TYPE_SIZE_UNIT (vectype))))
- /* Aligned access to excess elements is OK if
- at least one element is accessed in the
- scalar loop. */
- ;
- else if (known_gt (vect_align,
- ((nunits - remain) * dr_size)))
- /* Aligned access to the gap area when there's
- at least one element in it is OK. */
- ;
- else
+ /* remain should now be > 0 and < nunits. */
+ unsigned num;
+ if (known_ne (remain, 0u)
+ && constant_multiple_p (nunits, remain, &num))
+ {
+ tree ptype;
+ new_vtype
+ = vector_vector_composition_type (vectype, num,
+ &ptype);
+ if (new_vtype)
+ ltype = ptype;
+ }
+ /* Else use multiple loads or a masked load? */
+ /* For loop vectorization we now should have
+ an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
+ set. */
+ if (loop_vinfo)
+ gcc_assert (new_vtype
+ || LOOP_VINFO_PEELING_FOR_GAPS
+ (loop_vinfo));
+ /* But still reduce the access size to the next
+ required power-of-two so peeling a single
+ scalar iteration is sufficient. */
+ unsigned HOST_WIDE_INT cremain;
+ if (remain.is_constant (&cremain))
{
- /* remain should now be > 0 and < nunits. */
- unsigned num;
- if (known_ne (remain, 0u)
- && constant_multiple_p (nunits, remain, &num))
+ unsigned HOST_WIDE_INT cpart_size
+ = 1 << ceil_log2 (cremain);
+ if (known_gt (nunits, cpart_size)
+ && constant_multiple_p (nunits, cpart_size,
+ &num))
{
tree ptype;
new_vtype
if (new_vtype)
ltype = ptype;
}
- /* Else use multiple loads or a masked load? */
- /* For loop vectorization we now should have
- an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
- set. */
- if (loop_vinfo)
- gcc_assert (new_vtype
- || LOOP_VINFO_PEELING_FOR_GAPS
- (loop_vinfo));
- /* But still reduce the access size to the next
- required power-of-two so peeling a single
- scalar iteration is sufficient. */
- unsigned HOST_WIDE_INT cremain;
- if (remain.is_constant (&cremain))
- {
- unsigned HOST_WIDE_INT cpart_size
- = 1 << ceil_log2 (cremain);
- if (known_gt (nunits, cpart_size)
- && constant_multiple_p (nunits, cpart_size,
- &num))
- {
- tree ptype;
- new_vtype
- = vector_vector_composition_type (vectype,
- num,
- &ptype);
- if (new_vtype)
- ltype = ptype;
- }
- }
}
}
- tree offset
- = (dataref_offset ? dataref_offset
- : build_int_cst (ref_type, 0));
- if (!ltype)
+ }
+ tree offset = (dataref_offset ? dataref_offset
+ : build_int_cst (ref_type, 0));
+ if (!ltype)
+ ;
+ else if (ltype != vectype
+ && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+ poly_uint64 gap_offset
+ = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
+ - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
+ tree gapcst = build_int_cstu (ref_type, gap_offset);
+ offset = size_binop (PLUS_EXPR, offset, gapcst);
+ }
+ if (ltype)
+ {
+ data_ref = fold_build2 (MEM_REF, ltype,
+ dataref_ptr, offset);
+ if (alignment_support_scheme == dr_aligned)
;
- else if (ltype != vectype
- && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ else
+ TREE_TYPE (data_ref)
+ = build_aligned_type (TREE_TYPE (data_ref),
+ align * BITS_PER_UNIT);
+ }
+ if (!ltype)
+ data_ref = build_constructor (vectype, NULL);
+ else if (ltype != vectype)
+ {
+ vect_copy_ref_info (data_ref,
+ DR_REF (first_dr_info->dr));
+ tree tem = make_ssa_name (ltype);
+ new_stmt = gimple_build_assign (tem, data_ref);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+ gsi);
+ data_ref = NULL;
+ vec<constructor_elt, va_gc> *v;
+ /* We've computed 'num' above to statically two
+ or via constant_multiple_p. */
+ unsigned num
+ = (exact_div (tree_to_poly_uint64
+ (TYPE_SIZE_UNIT (vectype)),
+ tree_to_poly_uint64
+ (TYPE_SIZE_UNIT (ltype)))
+ .to_constant ());
+ vec_alloc (v, num);
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
{
- poly_uint64 gap_offset
- = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
- - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
- tree gapcst = build_int_cstu (ref_type, gap_offset);
- offset = size_binop (PLUS_EXPR, offset, gapcst);
+ while (--num)
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (ltype));
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
}
- if (ltype)
+ else
{
- data_ref
- = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
- if (alignment_support_scheme == dr_aligned)
- ;
- else
- TREE_TYPE (data_ref)
- = build_aligned_type (TREE_TYPE (data_ref),
- align * BITS_PER_UNIT);
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+ while (--num)
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (ltype));
}
- if (!ltype)
- data_ref = build_constructor (vectype, NULL);
- else if (ltype != vectype)
+ gcc_assert (new_vtype != NULL_TREE);
+ if (new_vtype == vectype)
+ new_stmt
+ = gimple_build_assign (vec_dest,
+ build_constructor (vectype, v));
+ else
{
- vect_copy_ref_info (data_ref,
- DR_REF (first_dr_info->dr));
- tree tem = make_ssa_name (ltype);
- new_stmt = gimple_build_assign (tem, data_ref);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
- gsi);
- data_ref = NULL;
- vec<constructor_elt, va_gc> *v;
- /* We've computed 'num' above to statically two
- or via constant_multiple_p. */
- unsigned num
- = (exact_div (tree_to_poly_uint64
- (TYPE_SIZE_UNIT (vectype)),
- tree_to_poly_uint64
- (TYPE_SIZE_UNIT (ltype)))
- .to_constant ());
- vec_alloc (v, num);
- if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
- {
- while (--num)
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
- build_zero_cst (ltype));
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
- }
- else
- {
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
- while (--num)
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
- build_zero_cst (ltype));
- }
- gcc_assert (new_vtype != NULL_TREE);
- if (new_vtype == vectype)
- new_stmt = gimple_build_assign (
- vec_dest, build_constructor (vectype, v));
- else
- {
- tree new_vname = make_ssa_name (new_vtype);
- new_stmt = gimple_build_assign (
- new_vname, build_constructor (new_vtype, v));
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- new_stmt = gimple_build_assign (
- vec_dest,
- build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
- }
+ tree new_vname = make_ssa_name (new_vtype);
+ new_stmt
+ = gimple_build_assign (new_vname,
+ build_constructor (new_vtype,
+ v));
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ new_stmt
+ = gimple_build_assign (vec_dest,
+ build1 (VIEW_CONVERT_EXPR,
+ vectype, new_vname));
}
}
- break;
}
- case dr_explicit_realign:
- {
- if (costing_p)
- break;
- tree ptr, bump;
-
- tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
-
- if (compute_in_loop)
- msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
- &realignment_token,
- dr_explicit_realign,
- dataref_ptr, NULL);
+ break;
+ }
+ case dr_explicit_realign:
+ {
+ if (costing_p)
+ break;
+ tree ptr, bump;
+
+ tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
+
+ if (compute_in_loop)
+ msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
+ &realignment_token,
+ dr_explicit_realign,
+ dataref_ptr, NULL);
+
+ if (TREE_CODE (dataref_ptr) == SSA_NAME)
+ ptr = copy_ssa_name (dataref_ptr);
+ else
+ ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
+ // For explicit realign the target alignment should be
+ // known at compile time.
+ unsigned HOST_WIDE_INT align
+ = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
+ new_stmt = gimple_build_assign (ptr, BIT_AND_EXPR, dataref_ptr,
+ build_int_cst
+ (TREE_TYPE (dataref_ptr),
+ -(HOST_WIDE_INT) align));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ data_ref = build2 (MEM_REF, vectype,
+ ptr, build_int_cst (ref_type, 0));
+ vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ new_stmt = gimple_build_assign (vec_dest, data_ref);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ gimple_move_vops (new_stmt, stmt_info->stmt);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ msq = new_temp;
+
+ bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
+ bump = size_binop (MINUS_EXPR, bump, size_one_node);
+ ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
+ bump);
+ new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR, ptr,
+ build_int_cst (TREE_TYPE (ptr),
+ -(HOST_WIDE_INT) align));
+ if (TREE_CODE (ptr) == SSA_NAME)
+ ptr = copy_ssa_name (ptr, new_stmt);
+ else
+ ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
+ gimple_assign_set_lhs (new_stmt, ptr);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ data_ref = build2 (MEM_REF, vectype,
+ ptr, build_int_cst (ref_type, 0));
+ break;
+ }
+ case dr_explicit_realign_optimized:
+ {
+ if (costing_p)
+ break;
+ if (TREE_CODE (dataref_ptr) == SSA_NAME)
+ new_temp = copy_ssa_name (dataref_ptr);
+ else
+ new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
+ // We should only be doing this if we know the target
+ // alignment at compile time.
+ unsigned HOST_WIDE_INT align
+ = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
+ new_stmt = gimple_build_assign (new_temp, BIT_AND_EXPR, dataref_ptr,
+ build_int_cst (TREE_TYPE (dataref_ptr),
+ -(HOST_WIDE_INT) align));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ data_ref = build2 (MEM_REF, vectype, new_temp,
+ build_int_cst (ref_type, 0));
+ break;
+ }
+ default:
+ gcc_unreachable ();
+ }
- if (TREE_CODE (dataref_ptr) == SSA_NAME)
- ptr = copy_ssa_name (dataref_ptr);
- else
- ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
- // For explicit realign the target alignment should be
- // known at compile time.
- unsigned HOST_WIDE_INT align
- = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
- new_stmt = gimple_build_assign (
- ptr, BIT_AND_EXPR, dataref_ptr,
- build_int_cst (TREE_TYPE (dataref_ptr),
- -(HOST_WIDE_INT) align));
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- data_ref
- = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
- vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
- new_stmt = gimple_build_assign (vec_dest, data_ref);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- gimple_move_vops (new_stmt, stmt_info->stmt);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- msq = new_temp;
-
- bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
- bump = size_binop (MINUS_EXPR, bump, size_one_node);
- ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
- bump);
- new_stmt = gimple_build_assign (
- NULL_TREE, BIT_AND_EXPR, ptr,
- build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
- if (TREE_CODE (ptr) == SSA_NAME)
- ptr = copy_ssa_name (ptr, new_stmt);
- else
- ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
- gimple_assign_set_lhs (new_stmt, ptr);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- data_ref
- = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
- break;
- }
- case dr_explicit_realign_optimized:
- {
- if (costing_p)
- break;
- if (TREE_CODE (dataref_ptr) == SSA_NAME)
- new_temp = copy_ssa_name (dataref_ptr);
- else
- new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
- // We should only be doing this if we know the target
- // alignment at compile time.
- unsigned HOST_WIDE_INT align
- = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
- new_stmt = gimple_build_assign (
- new_temp, BIT_AND_EXPR, dataref_ptr,
- build_int_cst (TREE_TYPE (dataref_ptr),
- -(HOST_WIDE_INT) align));
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- data_ref = build2 (MEM_REF, vectype, new_temp,
- build_int_cst (ref_type, 0));
- break;
- }
- default:
- gcc_unreachable ();
+ /* One common place to cost the above vect load for different
+ alignment support schemes. */
+ if (costing_p)
+ {
+ /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
+ only need to take care of the first stmt, whose
+ stmt_info is first_stmt_info, vec_num iterating on it
+ will cover the cost for the remaining, it's consistent
+ with transforming. For the prologue cost for realign,
+ we only need to count it once for the whole group. */
+ bool first_stmt_info_p = first_stmt_info == stmt_info;
+ bool add_realign_cost = first_stmt_info_p && i == 0;
+ if (memory_access_type == VMAT_CONTIGUOUS
+ || memory_access_type == VMAT_CONTIGUOUS_REVERSE
+ || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
+ && (!grouped_load || first_stmt_info_p)))
+ {
+ /* Leave realign cases alone to keep them simple. */
+ if (alignment_support_scheme == dr_explicit_realign_optimized
+ || alignment_support_scheme == dr_explicit_realign)
+ vect_get_load_cost (vinfo, stmt_info, slp_node, 1,
+ alignment_support_scheme, misalignment,
+ add_realign_cost, &inside_cost,
+ &prologue_cost, cost_vec, cost_vec,
+ true);
+ else
+ n_adjacent_loads++;
}
-
- /* One common place to cost the above vect load for different
- alignment support schemes. */
- if (costing_p)
+ }
+ else
+ {
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ /* DATA_REF is null if we've already built the statement. */
+ if (data_ref)
{
- /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
- only need to take care of the first stmt, whose
- stmt_info is first_stmt_info, vec_num iterating on it
- will cover the cost for the remaining, it's consistent
- with transforming. For the prologue cost for realign,
- we only need to count it once for the whole group. */
- bool first_stmt_info_p = first_stmt_info == stmt_info;
- bool add_realign_cost = first_stmt_info_p && i == 0;
- if (memory_access_type == VMAT_CONTIGUOUS
- || memory_access_type == VMAT_CONTIGUOUS_REVERSE
- || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
- && (!grouped_load || first_stmt_info_p)))
- {
- /* Leave realign cases alone to keep them simple. */
- if (alignment_support_scheme == dr_explicit_realign_optimized
- || alignment_support_scheme == dr_explicit_realign)
- vect_get_load_cost (vinfo, stmt_info, slp_node, 1,
- alignment_support_scheme, misalignment,
- add_realign_cost, &inside_cost,
- &prologue_cost, cost_vec, cost_vec,
- true);
- else
- n_adjacent_loads++;
- }
+ vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+ new_stmt = gimple_build_assign (vec_dest, data_ref);
}
- else
- {
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
- /* DATA_REF is null if we've already built the statement. */
- if (data_ref)
- {
- vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
- new_stmt = gimple_build_assign (vec_dest, data_ref);
- }
- new_temp = need_zeroing
- ? make_ssa_name (vectype)
- : make_ssa_name (vec_dest, new_stmt);
- gimple_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ new_temp = (need_zeroing
+ ? make_ssa_name (vectype)
+ : make_ssa_name (vec_dest, new_stmt));
+ gimple_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- /* If we need to explicitly zero inactive elements emit a
- VEC_COND_EXPR that does so. */
- if (need_zeroing)
- {
- vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
- vectype);
+ /* If we need to explicitly zero inactive elements emit a
+ VEC_COND_EXPR that does so. */
+ if (need_zeroing)
+ {
+ vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
+ vectype);
- tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
- new_stmt
- = gimple_build_assign (new_temp2, VEC_COND_EXPR,
- final_mask, new_temp, vec_els);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
- gsi);
- new_temp = new_temp2;
- }
+ tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
+ new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
+ final_mask, new_temp, vec_els);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+ gsi);
+ new_temp = new_temp2;
}
+ }
- /* 3. Handle explicit realignment if necessary/supported.
- Create in loop:
- vec_dest = realign_load (msq, lsq, realignment_token) */
- if (!costing_p
- && (alignment_support_scheme == dr_explicit_realign_optimized
- || alignment_support_scheme == dr_explicit_realign))
- {
- lsq = gimple_assign_lhs (new_stmt);
- if (!realignment_token)
- realignment_token = dataref_ptr;
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
- new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
- lsq, realignment_token);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ /* 3. Handle explicit realignment if necessary/supported.
+ Create in loop:
+ vec_dest = realign_load (msq, lsq, realignment_token) */
+ if (!costing_p
+ && (alignment_support_scheme == dr_explicit_realign_optimized
+ || alignment_support_scheme == dr_explicit_realign))
+ {
+ lsq = gimple_assign_lhs (new_stmt);
+ if (!realignment_token)
+ realignment_token = dataref_ptr;
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
+ lsq, realignment_token);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- if (alignment_support_scheme == dr_explicit_realign_optimized)
- {
- gcc_assert (phi);
- if (i == vec_num - 1 && j == ncopies - 1)
- add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
- UNKNOWN_LOCATION);
- msq = lsq;
- }
+ if (alignment_support_scheme == dr_explicit_realign_optimized)
+ {
+ gcc_assert (phi);
+ if (i == vec_num - 1)
+ add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
+ UNKNOWN_LOCATION);
+ msq = lsq;
}
+ }
- if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+ if (costing_p)
+ inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
+ slp_node, 0, vect_body);
+ else
{
- if (costing_p)
- inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
- slp_node, 0, vect_body);
- else
- {
- tree perm_mask = perm_mask_for_reverse (vectype);
- new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
- perm_mask, stmt_info, gsi);
- new_stmt = SSA_NAME_DEF_STMT (new_temp);
- }
+ tree perm_mask = perm_mask_for_reverse (vectype);
+ new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
+ perm_mask, stmt_info, gsi);
+ new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
+ }
- /* Collect vector loads and later create their permutation in
- vect_transform_grouped_load (). */
- if (!costing_p && (grouped_load || slp_perm))
- dr_chain.quick_push (new_temp);
+ /* Collect vector loads and later create their permutation in
+ vect_transform_grouped_load (). */
+ if (!costing_p && (grouped_load || slp_perm))
+ dr_chain.quick_push (new_temp);
- /* Store vector loads in the corresponding SLP_NODE. */
- if (!costing_p && slp && !slp_perm)
- slp_node->push_vec_def (new_stmt);
+ /* Store vector loads in the corresponding SLP_NODE. */
+ if (!costing_p && !slp_perm)
+ slp_node->push_vec_def (new_stmt);
- /* With SLP permutation we load the gaps as well, without
- we need to skip the gaps after we manage to fully load
- all elements. group_gap_adj is DR_GROUP_SIZE here. */
- group_elt += nunits;
- if (!costing_p
- && maybe_ne (group_gap_adj, 0U)
- && !slp_perm
- && known_eq (group_elt, group_size - group_gap_adj))
- {
- poly_wide_int bump_val
- = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
- if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
- == -1)
- bump_val = -bump_val;
- tree bump = wide_int_to_tree (sizetype, bump_val);
- dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
- stmt_info, bump);
- group_elt = 0;
- }
- }
- /* Bump the vector pointer to account for a gap or for excess
- elements loaded for a permuted SLP load. */
+ /* With SLP permutation we load the gaps as well, without
+ we need to skip the gaps after we manage to fully load
+ all elements. group_gap_adj is DR_GROUP_SIZE here. */
+ group_elt += nunits;
if (!costing_p
&& maybe_ne (group_gap_adj, 0U)
- && slp_perm)
+ && !slp_perm
+ && known_eq (group_elt, group_size - group_gap_adj))
{
poly_wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
tree bump = wide_int_to_tree (sizetype, bump_val);
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
+ group_elt = 0;
}
+ }
+ /* Bump the vector pointer to account for a gap or for excess
+ elements loaded for a permuted SLP load. */
+ if (!costing_p
+ && maybe_ne (group_gap_adj, 0U)
+ && slp_perm)
+ {
+ poly_wide_int bump_val
+ = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
+ if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
+ bump_val = -bump_val;
+ tree bump = wide_int_to_tree (sizetype, bump_val);
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+ stmt_info, bump);
+ }
- if (slp && !slp_perm)
- continue;
-
- if (slp_perm)
+ if (slp_perm)
+ {
+ unsigned n_perms;
+ /* For SLP we know we've seen all possible uses of dr_chain so
+ direct vect_transform_slp_perm_load to DCE the unused parts.
+ ??? This is a hack to prevent compile-time issues as seen
+ in PR101120 and friends. */
+ if (costing_p)
{
- unsigned n_perms;
- /* For SLP we know we've seen all possible uses of dr_chain so
- direct vect_transform_slp_perm_load to DCE the unused parts.
- ??? This is a hack to prevent compile-time issues as seen
- in PR101120 and friends. */
- if (costing_p)
- {
- vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
- true, &n_perms, nullptr);
- inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
- slp_node, 0, vect_body);
- }
- else
- {
- bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
- gsi, vf, false, &n_perms,
- nullptr, true);
- gcc_assert (ok);
- }
+ vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
+ true, &n_perms, nullptr);
+ inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
+ slp_node, 0, vect_body);
}
else
{
- if (grouped_load)
- {
- gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
- /* We assume that the cost of a single load-lanes instruction
- is equivalent to the cost of DR_GROUP_SIZE separate loads.
- If a grouped access is instead being provided by a
- load-and-permute operation, include the cost of the
- permutes. */
- if (costing_p && first_stmt_info == stmt_info)
- {
- /* Uses an even and odd extract operations or shuffle
- operations for each needed permute. */
- int group_size = DR_GROUP_SIZE (first_stmt_info);
- int nstmts = ceil_log2 (group_size) * group_size;
- inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
- slp_node, 0, vect_body);
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vect_model_load_cost: "
- "strided group_size = %d .\n",
- group_size);
- }
- else if (!costing_p)
- {
- vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
- group_size, gsi);
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
- }
- }
- else if (!costing_p)
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
+ gsi, vf, false, &n_perms,
+ nullptr, true);
+ gcc_assert (ok);
}
dr_chain.release ();
}
- if (!slp && !costing_p)
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
if (costing_p)
{