info->ifn = gimple_call_internal_fn (call);
info->decl = NULL_TREE;
info->base = gimple_call_arg (call, 0);
+ info->alias_ptr = gimple_call_arg
+ (call, internal_fn_alias_ptr_index (info->ifn));
info->offset = gimple_call_arg
(call, internal_fn_offset_index (info->ifn));
info->offset_dt = vect_unknown_def_type;
info->ifn = ifn;
info->decl = decl;
info->base = base;
+
+ info->alias_ptr = build_int_cst
+ (reference_alias_ptr_type (DR_REF (dr)),
+ get_object_alignment (DR_REF (dr)));
+
info->offset = off;
info->offset_dt = vect_unknown_def_type;
info->offset_vectype = offset_vectype;
alignment.
If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
it is aligned, i.e., check if it is possible to vectorize it with different
- alignment. */
+ alignment. If GS_INFO is passed we are dealing with a gather/scatter. */
enum dr_alignment_support
vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
- tree vectype, int misalignment)
+ tree vectype, int misalignment,
+ gather_scatter_info *gs_info)
{
- data_reference *dr = dr_info->dr;
+ data_reference *dr = dr_info ? dr_info->dr : nullptr;
stmt_vec_info stmt_info = dr_info->stmt;
machine_mode mode = TYPE_MODE (vectype);
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
else if (dr_safe_speculative_read_required (stmt_info))
return dr_unaligned_unsupported;
- /* For now assume all conditional loads/stores support unaligned
- access without any special code. */
- if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
- if (gimple_call_internal_p (stmt)
- && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
- || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
- return dr_unaligned_supported;
-
if (loop_vinfo)
{
vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
}
} */
- if (DR_IS_READ (dr))
+ if (dr && DR_IS_READ (dr))
{
if (can_implement_p (vec_realign_load_optab, mode)
&& (!targetm.vectorize.builtin_mask_for_load
bool is_packed = false;
tree type = TREE_TYPE (DR_REF (dr));
+ bool is_gather_scatter = gs_info != nullptr;
if (misalignment == DR_MISALIGNMENT_UNKNOWN)
- is_packed = not_size_aligned (DR_REF (dr));
+ {
+ if (!is_gather_scatter || dr != nullptr)
+ is_packed = not_size_aligned (DR_REF (dr));
+ else
+ {
+ /* Gather-scatter accesses normally perform only component accesses
+ so alignment is irrelevant for them. Targets like riscv do care
+ about scalar alignment in vector accesses, though, so check scalar
+ alignment here. We determined the alias pointer as well as the
+ base alignment during pattern recognition and can re-use it here.
+
+ As we do not have an analyzed dataref we only know the alignment
+ of the reference itself and nothing about init, steps, etc.
+ For now don't try harder to determine misalignment and
+ just assume it is unknown. We consider the type packed if its
+ scalar alignment is lower than the natural alignment of a vector
+ element's type. */
+
+ gcc_assert (!GATHER_SCATTER_LEGACY_P (*gs_info));
+ gcc_assert (dr == nullptr);
+
+ tree inner_vectype = TREE_TYPE (vectype);
+
+ unsigned HOST_WIDE_INT scalar_align
+ = tree_to_uhwi (gs_info->alias_ptr);
+ unsigned HOST_WIDE_INT inner_vectype_sz
+ = tree_to_uhwi (TYPE_SIZE (inner_vectype));
+
+ bool is_misaligned = scalar_align < inner_vectype_sz;
+ is_packed = scalar_align > 1 && is_misaligned;
+ }
+ }
if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
- is_packed, false))
+ is_packed,
+ is_gather_scatter))
return dr_unaligned_supported;
/* Unsupported. */
/* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
but we don't need to store that here. */
gs_info->base = NULL_TREE;
+ gs_info->alias_ptr = build_int_cst
+ (reference_alias_ptr_type (DR_REF (dr)),
+ get_object_alignment (DR_REF (dr)));
gs_info->element_type = TREE_TYPE (vectype);
gs_info->offset = fold_convert (offset_type, step);
gs_info->offset_dt = vect_constant_def;
separated by the stride, until we have a complete vector.
Fall back to scalar accesses if that isn't possible. */
*memory_access_type = VMAT_STRIDED_SLP;
- else
+ else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
int cmp = compare_step_with_zero (vinfo, stmt_info);
if (cmp < 0)
allows us to use contiguous accesses. */
if ((*memory_access_type == VMAT_ELEMENTWISE
|| *memory_access_type == VMAT_STRIDED_SLP)
+ && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
&& single_element_p
&& SLP_TREE_LANES (slp_node) == 1
&& loop_vinfo
&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
masked_p, gs_info, elsvals))
*memory_access_type = VMAT_GATHER_SCATTER;
+ else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ {
+ *memory_access_type = VMAT_GATHER_SCATTER;
+ if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
+ elsvals))
+ gcc_unreachable ();
+ /* When using internal functions, we rely on pattern recognition
+ to convert the type of the offset to the type that the target
+ requires, with the result being a call to an internal function.
+ If that failed for some reason (e.g. because another pattern
+ took priority), just handle cases in which the offset already
+ has the right type. */
+ else if (GATHER_SCATTER_IFN_P (*gs_info)
+ && !is_gimple_call (stmt_info->stmt)
+ && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
+ TREE_TYPE (gs_info->offset_vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "%s offset requires a conversion\n",
+ vls_type == VLS_LOAD ? "gather" : "scatter");
+ return false;
+ }
+ else if (!vect_is_simple_use (gs_info->offset, vinfo,
+ &gs_info->offset_dt,
+ &gs_info->offset_vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "%s index use not simple.\n",
+ vls_type == VLS_LOAD ? "gather" : "scatter");
+ return false;
+ }
+ else if (GATHER_SCATTER_EMULATED_P (*gs_info))
+ {
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
+ || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
+ || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
+ || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
+ (gs_info->offset_vectype),
+ TYPE_VECTOR_SUBPARTS (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported vector types for emulated "
+ "gather.\n");
+ return false;
+ }
+ }
+ }
if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
|| *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
*poffset = neg_ldst_offset;
- if (*memory_access_type == VMAT_GATHER_SCATTER
- || *memory_access_type == VMAT_ELEMENTWISE
+ if (*memory_access_type == VMAT_ELEMENTWISE
+ || (*memory_access_type == VMAT_GATHER_SCATTER
+ && GATHER_SCATTER_LEGACY_P (*gs_info))
|| *memory_access_type == VMAT_STRIDED_SLP
|| *memory_access_type == VMAT_INVARIANT)
{
}
else
{
- *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
+ if (*memory_access_type == VMAT_GATHER_SCATTER
+ && !first_dr_info)
+ *misalignment = DR_MISALIGNMENT_UNKNOWN;
+ else
+ *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
*alignment_support_scheme
- = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
- *misalignment);
+ = vect_supportable_dr_alignment
+ (vinfo, first_dr_info, vectype, *misalignment,
+ *memory_access_type == VMAT_GATHER_SCATTER ? gs_info : nullptr);
}
if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
*misalignment = DR_MISALIGNMENT_UNKNOWN;
*poffset = 0;
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- {
- *memory_access_type = VMAT_GATHER_SCATTER;
- if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
- elsvals))
- gcc_unreachable ();
- /* When using internal functions, we rely on pattern recognition
- to convert the type of the offset to the type that the target
- requires, with the result being a call to an internal function.
- If that failed for some reason (e.g. because another pattern
- took priority), just handle cases in which the offset already
- has the right type. */
- else if (GATHER_SCATTER_IFN_P (*gs_info)
- && !is_gimple_call (stmt_info->stmt)
- && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
- TREE_TYPE (gs_info->offset_vectype)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "%s offset requires a conversion\n",
- vls_type == VLS_LOAD ? "gather" : "scatter");
- return false;
- }
- slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
- gs_info->offset_dt = SLP_TREE_DEF_TYPE (offset_node);
- gs_info->offset_vectype = SLP_TREE_VECTYPE (offset_node);
- if (gs_info->ifn == IFN_LAST && !gs_info->decl)
- {
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
- || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
- || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
- || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
- (gs_info->offset_vectype),
- TYPE_VECTOR_SUBPARTS (vectype)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported vector types for emulated "
- "gather.\n");
- return false;
- }
- }
- /* Gather-scatter accesses perform only component accesses, alignment
- is irrelevant for them. */
- *alignment_support_scheme = dr_unaligned_supported;
- }
- else if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
- masked_p,
- vls_type, memory_access_type, poffset,
- alignment_support_scheme,
- misalignment, gs_info, lanes_ifn,
- elsvals))
+ if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
+ masked_p,
+ vls_type, memory_access_type, poffset,
+ alignment_support_scheme,
+ misalignment, gs_info, lanes_ifn,
+ elsvals))
return false;
if ((*memory_access_type == VMAT_ELEMENTWISE
"alignment. With non-contiguous memory vectorization"
" could read out of bounds at %G ",
STMT_VINFO_STMT (stmt_info));
- if (inbounds)
- LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
- else
- return false;
+ if (inbounds)
+ LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ else
+ return false;
}
/* If this DR needs alignment for correctness, we must ensure the target
alignment is a constant power-of-two multiple of the amount read per
vector iteration or force masking. */
if (dr_safe_speculative_read_required (stmt_info)
- && *alignment_support_scheme == dr_aligned)
+ && (*alignment_support_scheme == dr_aligned
+ && *memory_access_type != VMAT_GATHER_SCATTER))
{
/* We can only peel for loops, of course. */
gcc_checking_assert (loop_vinfo);
if (dump_enabled_p ()
&& memory_access_type != VMAT_ELEMENTWISE
- && memory_access_type != VMAT_GATHER_SCATTER
&& memory_access_type != VMAT_STRIDED_SLP
&& memory_access_type != VMAT_INVARIANT
&& alignment_support_scheme != dr_aligned)
{
if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
call = gimple_build_call_internal (
- IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
+ IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
+ gs_info.alias_ptr,
vec_offset, scale, vec_oprnd, final_mask, final_len,
bias);
else
/* Non-vector offset indicates that prefer to take
MASK_LEN_STRIDED_STORE instead of the
- IFN_MASK_SCATTER_STORE with direct stride arg. */
+ IFN_MASK_SCATTER_STORE with direct stride arg.
+ Similar to the gather case we have checked the
+ alignment for a scatter already and assume
+ that the strided store has the same requirements. */
call = gimple_build_call_internal (
IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr,
vec_offset, vec_oprnd, final_mask, final_len, bias);
}
else if (final_mask)
call = gimple_build_call_internal
- (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
+ (IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
+ gs_info.alias_ptr,
vec_offset, scale, vec_oprnd, final_mask);
else
- call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
- dataref_ptr, vec_offset,
+ call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
+ dataref_ptr,
+ gs_info.alias_ptr,
+ vec_offset,
scale, vec_oprnd);
gimple_call_set_nothrow (call, true);
vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
- gcc_assert (alignment_support_scheme);
vec_loop_masks *loop_masks
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
? &LOOP_VINFO_MASKS (loop_vinfo)
/* Targets with store-lane instructions must not require explicit
realignment. vect_supportable_dr_alignment always returns either
- dr_aligned or dr_unaligned_supported for masked operations. */
+ dr_aligned or dr_unaligned_supported for (non-length) masked
+ operations. */
gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
&& !mask
&& !loop_masks)
+ || memory_access_type == VMAT_GATHER_SCATTER
|| alignment_support_scheme == dr_aligned
|| alignment_support_scheme == dr_unaligned_supported);
if (memory_access_type == VMAT_GATHER_SCATTER)
{
- gcc_assert (alignment_support_scheme == dr_aligned
- || alignment_support_scheme == dr_unaligned_supported);
gcc_assert (!grouped_load && !slp_perm);
unsigned int inside_cost = 0, prologue_cost = 0;
{
if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
- 8, dataref_ptr,
+ 9, dataref_ptr,
+ gs_info.alias_ptr,
vec_offset, scale, zero,
final_mask, vec_els,
final_len, bias);
}
else if (final_mask)
call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
- 6, dataref_ptr,
+ 7, dataref_ptr,
+ gs_info.alias_ptr,
vec_offset, scale,
zero, final_mask, vec_els);
else
- call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
- dataref_ptr, vec_offset,
- scale, zero);
+ call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
+ dataref_ptr,
+ gs_info.alias_ptr,
+ vec_offset, scale, zero);
gimple_call_set_nothrow (call, true);
new_stmt = call;
data_ref = NULL_TREE;