#include "optabs.h"
#include "optabs-tree.h"
#include "stor-layout.h"
+#include "internal-fn.h"
/* Return the optab used for computing the operation given by the tree code,
CODE and the tree EXP. This function is not always usable (for example, it
or mask_len_{load,store}.
This helper function checks whether target supports masked
load/store and return corresponding IFN in the last argument
- (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */
+ (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
+ If there is support and ELSVALS is nonzero store the possible else values
+ in the vector it points to. */
-static bool
+bool
target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode,
- bool is_load, internal_fn *ifn)
+ bool is_load, internal_fn *ifn,
+ vec<int> *elsvals)
{
optab op = is_load ? maskload_optab : maskstore_optab;
optab len_op = is_load ? mask_len_load_optab : mask_len_store_optab;
- if (convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing)
+ enum insn_code icode;
+ if ((icode = convert_optab_handler (op, mode, mask_mode))
+ != CODE_FOR_nothing)
{
if (ifn)
*ifn = is_load ? IFN_MASK_LOAD : IFN_MASK_STORE;
+ if (elsvals && is_load)
+ get_supported_else_vals (icode,
+ internal_fn_else_index (IFN_MASK_LOAD),
+ *elsvals);
return true;
}
- else if (convert_optab_handler (len_op, mode, mask_mode) != CODE_FOR_nothing)
+ else if ((icode = convert_optab_handler (len_op, mode, mask_mode))
+ != CODE_FOR_nothing)
{
if (ifn)
*ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
+ if (elsvals && is_load)
+ get_supported_else_vals (icode,
+ internal_fn_else_index (IFN_MASK_LEN_LOAD),
+ *elsvals);
return true;
}
return false;
/* Return true if target supports vector masked load/store for mode.
An additional output in the last argument which is the IFN pointer.
We set IFN as MASK_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according
- which optab is supported in the target. */
+ which optab is supported in the target.
+ If there is support and ELSVALS is nonzero store the possible else values
+ in the vector it points to. */
bool
can_vec_mask_load_store_p (machine_mode mode,
machine_mode mask_mode,
bool is_load,
- internal_fn *ifn)
+ internal_fn *ifn,
+ vec<int> *elsvals)
{
machine_mode vmode;
/* If mode is vector mode, check it directly. */
if (VECTOR_MODE_P (mode))
- return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn);
+ return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn,
+ elsvals);
/* Otherwise, return true if there is some vector mode with
the mask load/store supported. */
vmode = targetm.vectorize.preferred_simd_mode (smode);
if (VECTOR_MODE_P (vmode)
&& targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
- && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
+ && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
+ elsvals))
return true;
auto_vector_modes vector_modes;
for (machine_mode base_mode : vector_modes)
if (related_vector_mode (base_mode, smode).exists (&vmode)
&& targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
- && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
+ && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
+ elsvals))
return true;
return false;
}
or mask_len_{load,store}.
This helper function checks whether target supports len
load/store and return corresponding IFN in the last argument
- (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */
+ (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
+ If there is support and ELSVALS is nonzero store thepossible
+ else values in the vector it points to. */
static bool
target_supports_len_load_store_p (machine_mode mode, bool is_load,
- internal_fn *ifn)
+ internal_fn *ifn, vec<int> *elsvals)
{
optab op = is_load ? len_load_optab : len_store_optab;
optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab;
return true;
}
machine_mode mask_mode;
+ enum insn_code icode;
if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
- && convert_optab_handler (masked_op, mode, mask_mode) != CODE_FOR_nothing)
+ && ((icode = convert_optab_handler (masked_op, mode, mask_mode))
+ != CODE_FOR_nothing))
{
if (ifn)
*ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
+ if (elsvals && is_load)
+ get_supported_else_vals (icode,
+ internal_fn_else_index (IFN_MASK_LEN_LOAD),
+ *elsvals);
return true;
}
return false;
VnQI to wrap the other supportable same size vector modes.
An additional output in the last argument which is the IFN pointer.
We set IFN as LEN_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according
- which optab is supported in the target. */
+ which optab is supported in the target.
+ If there is support and ELSVALS is nonzero store the possible else values
+ in the vector it points to. */
opt_machine_mode
-get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn)
+get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn,
+ vec<int> *elsvals)
{
gcc_assert (VECTOR_MODE_P (mode));
/* Check if length in lanes supported for this mode directly. */
- if (target_supports_len_load_store_p (mode, is_load, ifn))
+ if (target_supports_len_load_store_p (mode, is_load, ifn, elsvals))
return mode;
/* Check if length in bytes supported for same vector size VnQI. */
machine_mode vmode;
poly_uint64 nunits = GET_MODE_SIZE (mode);
if (related_vector_mode (mode, QImode, nunits).exists (&vmode)
- && target_supports_len_load_store_p (vmode, is_load, ifn))
+ && target_supports_len_load_store_p (vmode, is_load, ifn, elsvals))
return vmode;
return opt_machine_mode ();
#include "vec-perm-indices.h"
#include "internal-fn.h"
#include "gimple-fold.h"
+#include "optabs-query.h"
/* Return true if load- or store-lanes optab OPTAB is implemented for
- COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
+ COUNT vectors of type VECTYPE. NAME is the name of OPTAB.
+
+ If it is implemented and ELSVALS is nonzero store the possible else
+ values in the vector it points to. */
static bool
vect_lanes_optab_supported_p (const char *name, convert_optab optab,
- tree vectype, unsigned HOST_WIDE_INT count)
+ tree vectype, unsigned HOST_WIDE_INT count,
+ vec<int> *elsvals = nullptr)
{
machine_mode mode, array_mode;
bool limit_p;
}
}
- if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
+ enum insn_code icode;
+ if ((icode = convert_optab_handler (optab, array_mode, mode))
+ == CODE_FOR_nothing)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
- GET_MODE_NAME (mode));
+ "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
+ GET_MODE_NAME (mode));
+
+ if (elsvals)
+ get_supported_else_vals (icode,
+ internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
+ *elsvals);
return true;
}
be multiplied *after* it has been converted to address width.
Return true if the function is supported, storing the function id in
- *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */
+ *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
+
+ If we can use gather and store the possible else values in ELSVALS. */
bool
vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
tree vectype, tree memory_type, tree offset_type,
int scale, internal_fn *ifn_out,
- tree *offset_vectype_out)
+ tree *offset_vectype_out, vec<int> *elsvals)
{
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
unsigned int element_bits = vector_element_bits (vectype);
/* Test whether the target supports this combination. */
if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
- offset_vectype, scale))
+ offset_vectype, scale,
+ elsvals))
{
*ifn_out = ifn;
*offset_vectype_out = offset_vectype;
&& internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
memory_type,
offset_vectype,
- scale))
+ scale, elsvals))
{
*ifn_out = alt_ifn;
*offset_vectype_out = offset_vectype;
}
else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
memory_type,
- offset_vectype, scale))
+ offset_vectype, scale,
+ elsvals))
{
*ifn_out = alt_ifn2;
*offset_vectype_out = offset_vectype;
}
/* Return true if a non-affine read or write in STMT_INFO is suitable for a
- gather load or scatter store. Describe the operation in *INFO if so. */
+ gather load or scatter store. Describe the operation in *INFO if so.
+ If it is suitable and ELSVALS is nonzero store the supported else values
+ in the vector it points to. */
bool
vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
- gather_scatter_info *info)
+ gather_scatter_info *info, vec<int> *elsvals)
{
HOST_WIDE_INT scale = 1;
poly_int64 pbitpos, pbitsize;
if (internal_gather_scatter_fn_p (ifn))
{
vect_describe_gather_scatter_call (stmt_info, info);
+
+ /* In pattern recog we simply used a ZERO else value that
+ we need to correct here. To that end just re-use the
+ (already succesful) check if we support a gather IFN
+ and have it populate the else values. */
+ if (DR_IS_READ (dr) && internal_fn_mask_index (ifn) >= 0 && elsvals)
+ supports_vec_gather_load_p (TYPE_MODE (vectype), elsvals);
return true;
}
masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
/* True if we should aim to use internal functions rather than
built-in functions. */
bool use_ifn_p = (DR_IS_READ (dr)
- ? supports_vec_gather_load_p (TYPE_MODE (vectype))
+ ? supports_vec_gather_load_p (TYPE_MODE (vectype),
+ elsvals)
: supports_vec_scatter_store_p (TYPE_MODE (vectype)));
base = DR_REF (dr);
masked_p, vectype, memory_type,
signed_char_type_node,
new_scale, &ifn,
- &offset_vectype)
+ &offset_vectype,
+ elsvals)
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
unsigned_char_type_node,
new_scale, &ifn,
- &offset_vectype))
+ &offset_vectype,
+ elsvals))
break;
scale = new_scale;
off = op0;
&& vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
TREE_TYPE (off), scale, &ifn,
- &offset_vectype))
+ &offset_vectype, elsvals))
break;
if (TYPE_PRECISION (TREE_TYPE (op0))
{
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
vectype, memory_type, offtype, scale,
- &ifn, &offset_vectype))
+ &ifn, &offset_vectype, elsvals))
ifn = IFN_LAST;
decl = NULL_TREE;
}
}
/* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors
- of type VECTYPE. MASKED_P says whether the masked form is needed. */
+ of type VECTYPE. MASKED_P says whether the masked form is needed.
+ If it is available and ELSVALS is nonzero store the possible else values
+ in the vector it points to. */
internal_fn
vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
- bool masked_p)
+ bool masked_p, vec<int> *elsvals)
{
if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
vec_mask_len_load_lanes_optab, vectype,
- count))
+ count, elsvals))
return IFN_MASK_LEN_LOAD_LANES;
else if (masked_p)
{
if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
vec_mask_load_lanes_optab, vectype,
- count))
+ count, elsvals))
return IFN_MASK_LOAD_LANES;
}
else
{
if (vect_lanes_optab_supported_p ("vec_load_lanes", vec_load_lanes_optab,
- vectype, count))
+ vectype, count, elsvals))
return IFN_LOAD_LANES;
}
return IFN_LAST;
#include "regs.h"
#include "attribs.h"
#include "optabs-libfuncs.h"
+#include "tree-dfa.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
/* ARRAY is an array of vectors created by create_vector_array.
Return an SSA_NAME for the vector in index N. The reference
is part of the vectorization of STMT_INFO and the vector is associated
- with scalar destination SCALAR_DEST. */
+ with scalar destination SCALAR_DEST.
+ If we need to ensure that inactive elements are set to zero,
+ NEED_ZEROING is true, MASK contains the loop mask to be used. */
static tree
read_vector_array (vec_info *vinfo,
stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
+ tree scalar_dest, tree array, unsigned HOST_WIDE_INT n,
+ bool need_zeroing, tree mask)
{
- tree vect_type, vect, vect_name, array_ref;
+ tree vect_type, vect, vect_name, tmp, tmp_name, array_ref;
gimple *new_stmt;
gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
vect_type = TREE_TYPE (TREE_TYPE (array));
+ tmp = vect_create_destination_var (scalar_dest, vect_type);
vect = vect_create_destination_var (scalar_dest, vect_type);
array_ref = build4 (ARRAY_REF, vect_type, array,
build_int_cst (size_type_node, n),
NULL_TREE, NULL_TREE);
- new_stmt = gimple_build_assign (vect, array_ref);
- vect_name = make_ssa_name (vect, new_stmt);
- gimple_assign_set_lhs (new_stmt, vect_name);
+ new_stmt = gimple_build_assign (tmp, array_ref);
+ tmp_name = make_ssa_name (vect, new_stmt);
+ gimple_assign_set_lhs (new_stmt, tmp_name);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ if (need_zeroing)
+ {
+ tree vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
+ vect_type);
+ vect_name = make_ssa_name (vect, new_stmt);
+ new_stmt
+ = gimple_build_assign (vect_name, VEC_COND_EXPR,
+ mask, tmp_name, vec_els);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ }
+ else
+ vect_name = tmp_name;
+
return vect_name;
}
if (mask_index >= 0
&& use == gimple_call_arg (call, mask_index))
return true;
+ int els_index = internal_fn_else_index (ifn);
+ if (els_index >= 0
+ && use == gimple_call_arg (call, els_index))
+ return true;
int stored_value_index = internal_fn_stored_value_index (ifn);
if (stored_value_index >= 0
&& use == gimple_call_arg (call, stored_value_index))
vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
gcc_assert (vector_type);
- tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
+ /* A masked load can have a default SSA definition as else operand.
+ We should "vectorize" this instead of creating a duplicate from the
+ scalar default. */
+ tree vop;
+ if (TREE_CODE (op) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (op)
+ && VAR_P (SSA_NAME_VAR (op)))
+ vop = get_or_create_ssa_default_def (cfun,
+ create_tmp_var (vector_type));
+ else
+ vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
while (ncopies--)
vec_oprnds->quick_push (vop);
}
Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
vectors is not supported, otherwise record the required rgroup control
- types. */
+ types.
+
+ If partial vectors can be used and ELSVALS is nonzero the supported
+ else values will be added to the vector ELSVALS points to. */
static void
check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
vect_memory_access_type
memory_access_type,
gather_scatter_info *gs_info,
- tree scalar_mask)
+ tree scalar_mask,
+ vec<int> *elsvals = nullptr)
{
/* Invariant loads need no special support. */
if (memory_access_type == VMAT_INVARIANT)
if (slp_node)
nvectors /= group_size;
internal_fn ifn
- = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
+ = (is_load ? vect_load_lanes_supported (vectype, group_size, true,
+ elsvals)
: vect_store_lanes_supported (vectype, group_size, true));
if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
gs_info->memory_type,
gs_info->offset_vectype,
- gs_info->scale))
+ gs_info->scale,
+ elsvals))
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
gs_info->memory_type,
gs_info->offset_vectype,
- gs_info->scale))
+ gs_info->scale,
+ elsvals))
vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
scalar_mask);
else
machine_mode mask_mode;
machine_mode vmode;
bool using_partial_vectors_p = false;
- if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
+ if (get_len_load_store_mode
+ (vecmode, is_load, nullptr, elsvals).exists (&vmode))
{
nvectors = group_memory_nvectors (group_size * vf, nunits);
unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
using_partial_vectors_p = true;
}
else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
- && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
+ && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL,
+ elsvals))
{
nvectors = group_memory_nvectors (group_size * vf, nunits);
vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
without loss of precision, where X is STMT_INFO's DR_STEP.
Return true if this is possible, describing the gather load or scatter
- store in GS_INFO. MASKED_P is true if the load or store is conditional. */
+ store in GS_INFO. MASKED_P is true if the load or store is conditional.
+
+ If we can use gather/scatter and ELSVALS is nonzero the supported
+ else values will be stored in the vector ELSVALS points to. */
static bool
vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
loop_vec_info loop_vinfo, bool masked_p,
- gather_scatter_info *gs_info)
+ gather_scatter_info *gs_info,
+ vec<int> *elsvals)
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
data_reference *dr = dr_info->dr;
tree memory_type = TREE_TYPE (DR_REF (dr));
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
vectype, memory_type, offset_type, scale,
- &gs_info->ifn, &gs_info->offset_vectype)
+ &gs_info->ifn, &gs_info->offset_vectype,
+ elsvals)
|| gs_info->ifn == IFN_LAST)
continue;
vectorize STMT_INFO, which is a grouped or strided load or store.
MASKED_P is true if load or store is conditional. When returning
true, fill in GS_INFO with the information required to perform the
- operation. */
+ operation.
+
+ If we can use gather/scatter and ELSVALS is nonzero the supported
+ else values will be stored in the vector ELSVALS points to. */
static bool
vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
loop_vec_info loop_vinfo, bool masked_p,
- gather_scatter_info *gs_info)
+ gather_scatter_info *gs_info,
+ vec<int> *elsvals)
{
- if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
+ if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
|| gs_info->ifn == IFN_LAST)
return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
- masked_p, gs_info);
+ masked_p, gs_info, elsvals);
tree old_offset_type = TREE_TYPE (gs_info->offset);
tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
For stores, the statements in the group are all consecutive
and there is no gap at the end. For loads, the statements in the
group might not be consecutive; there can be gaps between statements
- as well as at the end. */
+ as well as at the end.
+
+ If we can use gather/scatter and ELSVALS is nonzero the supported
+ else values will be stored in the vector ELSVALS points to.
+*/
static bool
get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
dr_alignment_support *alignment_support_scheme,
int *misalignment,
gather_scatter_info *gs_info,
- internal_fn *lanes_ifn)
+ internal_fn *lanes_ifn,
+ vec<int> *elsvals)
{
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
else if (slp_node->ldst_lanes
&& (*lanes_ifn
= (vls_type == VLS_LOAD
- ? vect_load_lanes_supported (vectype, group_size, masked_p)
+ ? vect_load_lanes_supported (vectype, group_size,
+ masked_p, elsvals)
: vect_store_lanes_supported (vectype, group_size,
masked_p))) != IFN_LAST)
*memory_access_type = VMAT_LOAD_STORE_LANES;
/* Otherwise try using LOAD/STORE_LANES. */
*lanes_ifn
= vls_type == VLS_LOAD
- ? vect_load_lanes_supported (vectype, group_size, masked_p)
+ ? vect_load_lanes_supported (vectype, group_size, masked_p,
+ elsvals)
: vect_store_lanes_supported (vectype, group_size,
masked_p);
if (*lanes_ifn != IFN_LAST)
&& (!slp_node || SLP_TREE_LANES (slp_node) == 1)
&& loop_vinfo
&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
- masked_p, gs_info))
+ masked_p, gs_info, elsvals))
*memory_access_type = VMAT_GATHER_SCATTER;
if (*memory_access_type == VMAT_GATHER_SCATTER
SLP says whether we're performing SLP rather than loop vectorization.
MASKED_P is true if the statement is conditional on a vectorized mask.
VECTYPE is the vector type that the vectorized statements will use.
- NCOPIES is the number of vector statements that will be needed. */
+ NCOPIES is the number of vector statements that will be needed.
+
+ If ELSVALS is nonzero the supported else values will be stored in the
+ vector ELSVALS points to. */
static bool
get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
dr_alignment_support *alignment_support_scheme,
int *misalignment,
gather_scatter_info *gs_info,
- internal_fn *lanes_ifn)
+ internal_fn *lanes_ifn,
+ vec<int> *elsvals = nullptr)
{
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
*memory_access_type = VMAT_GATHER_SCATTER;
- if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
+ if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
+ elsvals))
gcc_unreachable ();
/* When using internal functions, we rely on pattern recognition
to convert the type of the offset to the type that the target
masked_p,
vls_type, memory_access_type, poffset,
alignment_support_scheme,
- misalignment, gs_info, lanes_ifn))
+ misalignment, gs_info, lanes_ifn,
+ elsvals))
return false;
}
else if (STMT_VINFO_STRIDED_P (stmt_info))
gcc_assert (!slp_node);
if (loop_vinfo
&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
- masked_p, gs_info))
+ masked_p, gs_info, elsvals))
*memory_access_type = VMAT_GATHER_SCATTER;
else
*memory_access_type = VMAT_ELEMENTWISE;
return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
}
+/* Return the corresponding else value for an else value constant
+ ELSVAL with type TYPE. */
+
+tree
+vect_get_mask_load_else (int elsval, tree type)
+{
+ tree els;
+ if (elsval == MASK_LOAD_ELSE_UNDEFINED)
+ {
+ tree tmp = create_tmp_var (type);
+ /* No need to warn about anything. */
+ TREE_NO_WARNING (tmp) = 1;
+ els = get_or_create_ssa_default_def (cfun, tmp);
+ }
+ else if (elsval == MASK_LOAD_ELSE_M1)
+ els = build_minus_one_cst (type);
+ else if (elsval == MASK_LOAD_ELSE_ZERO)
+ els = build_zero_cst (type);
+ else
+ gcc_unreachable ();
+
+ return els;
+}
+
/* Build a gather load call while vectorizing STMT_INFO. Insert new
instructions before GSI and add them to VEC_STMT. GS_INFO describes
the gather load operation. If the load is conditional, MASK is the
gather_scatter_info gs_info;
tree ref_type;
enum vect_def_type mask_dt = vect_unknown_def_type;
+ enum vect_def_type els_dt = vect_unknown_def_type;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
return false;
tree mask = NULL_TREE, mask_vectype = NULL_TREE;
+ tree els = NULL_TREE; tree els_vectype = NULL_TREE;
+
int mask_index = -1;
+ int els_index = -1;
slp_tree slp_op = NULL;
+ slp_tree els_op = NULL;
if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
{
scalar_dest = gimple_assign_lhs (assign);
&& !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
&mask, &slp_op, &mask_dt, &mask_vectype))
return false;
+
+ els_index = internal_fn_else_index (ifn);
+ if (els_index >= 0 && slp_node)
+ els_index = vect_slp_child_index_for_operand
+ (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
+ if (els_index >= 0
+ && !vect_is_simple_use (vinfo, stmt_info, slp_node, els_index,
+ &els, &els_op, &els_dt, &els_vectype))
+ return false;
}
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int misalignment;
poly_int64 poffset;
internal_fn lanes_ifn;
+ auto_vec<int> elsvals;
+ int maskload_elsval = 0;
+ bool need_zeroing = false;
if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
ncopies, &memory_access_type, &poffset,
&alignment_support_scheme, &misalignment, &gs_info,
- &lanes_ifn))
+ &lanes_ifn, &elsvals))
return false;
+
+ /* We might need to explicitly zero inactive elements if there are
+ padding bits in the type that might leak otherwise.
+ Refer to PR115336. */
+ tree scalar_type = TREE_TYPE (scalar_dest);
+ bool type_mode_padding_p
+ = TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER (mode));
+
/* ??? The following checks should really be part of
get_group_load_store_type. */
if (slp
machine_mode vec_mode = TYPE_MODE (vectype);
if (!VECTOR_MODE_P (vec_mode)
|| !can_vec_mask_load_store_p (vec_mode,
- TYPE_MODE (mask_vectype), true))
+ TYPE_MODE (mask_vectype),
+ true, NULL, &elsvals))
return false;
}
else if (memory_access_type != VMAT_LOAD_STORE_LANES
check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
VLS_LOAD, group_size,
memory_access_type, &gs_info,
- mask);
+ mask, &elsvals);
if (dump_enabled_p ()
&& memory_access_type != VMAT_ELEMENTWISE
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
}
+ else
+ {
+ /* Here just get the else values. */
+ if (loop_vinfo
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+ check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
+ VLS_LOAD, group_size,
+ memory_access_type, &gs_info,
+ mask, &elsvals);
+ }
+
+ /* If the type needs padding we must zero inactive elements.
+ Check if we can do that with a VEC_COND_EXPR and store the
+ elsval we choose in MASKLOAD_ELSVAL. */
+ if (elsvals.length ()
+ && type_mode_padding_p
+ && !elsvals.contains (MASK_LOAD_ELSE_ZERO)
+ && !expand_vec_cond_expr_p (vectype, truth_type_for (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot zero inactive elements.\n");
+ return false;
+ }
+
+ /* For now just use the first available else value.
+ get_supported_else_vals tries MASK_LOAD_ELSE_ZERO first so we will
+ select it here if it is supported. */
+ if (elsvals.length ())
+ maskload_elsval = *elsvals.begin ();
if (!slp)
gcc_assert (memory_access_type
}
tree vec_mask = NULL_TREE;
+ tree vec_els = NULL_TREE;
if (memory_access_type == VMAT_LOAD_STORE_LANES)
{
gcc_assert (alignment_support_scheme == dr_aligned
}
}
+ if (final_mask)
+ {
+ vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
+ if (type_mode_padding_p
+ && maskload_elsval != MASK_LOAD_ELSE_ZERO)
+ need_zeroing = true;
+ }
+
gcall *call;
if (final_len && final_mask)
{
VEC_MASK, LEN, BIAS). */
unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
tree alias_ptr = build_int_cst (ref_type, align);
- call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
+ call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6,
dataref_ptr, alias_ptr,
- final_mask, final_len, bias);
+ final_mask, vec_els,
+ final_len, bias);
}
else if (final_mask)
{
VEC_MASK). */
unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
tree alias_ptr = build_int_cst (ref_type, align);
- call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
+ call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
dataref_ptr, alias_ptr,
- final_mask);
+ final_mask, vec_els);
}
else
{
for (unsigned i = 0; i < group_size; i++)
{
new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
- vec_array, i);
+ vec_array, i, need_zeroing,
+ final_mask);
if (slp)
slp_node->push_vec_def (new_temp);
else
}
}
+ if (final_mask)
+ {
+ vec_els = vect_get_mask_load_else
+ (maskload_elsval, vectype);
+ if (type_mode_padding_p
+ && maskload_elsval != MASK_LOAD_ELSE_ZERO)
+ need_zeroing = true;
+ }
+
gcall *call;
if (final_len && final_mask)
{
if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
call = gimple_build_call_internal (
- IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset,
- scale, zero, final_mask, final_len, bias);
+ IFN_MASK_LEN_GATHER_LOAD, 8, dataref_ptr, vec_offset,
+ scale, zero, final_mask, vec_els, final_len, bias);
else
/* Non-vector offset indicates that prefer to take
MASK_LEN_STRIDED_LOAD instead of the
MASK_LEN_GATHER_LOAD with direct stride arg. */
call = gimple_build_call_internal (
- IFN_MASK_LEN_STRIDED_LOAD, 6, dataref_ptr, vec_offset,
- zero, final_mask, final_len, bias);
+ IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr, vec_offset,
+ zero, final_mask, vec_els, final_len, bias);
}
else if (final_mask)
- call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
- dataref_ptr, vec_offset,
- scale, zero, final_mask);
+ call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
+ 6, dataref_ptr,
+ vec_offset, scale,
+ zero, final_mask,
+ vec_els);
else
call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
dataref_ptr, vec_offset,
vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
new_stmt = gimple_build_assign (vec_dest, data_ref);
}
- new_temp = make_ssa_name (vec_dest, new_stmt);
+ new_temp = need_zeroing
+ ? make_ssa_name (vectype)
+ : make_ssa_name (vec_dest, new_stmt);
gimple_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ /* If we need to explicitly zero inactive elements emit a
+ VEC_COND_EXPR that does so. */
+ if (need_zeroing)
+ {
+ vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
+ vectype);
+
+ tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
+ new_stmt
+ = gimple_build_assign (new_temp2, VEC_COND_EXPR,
+ final_mask, new_temp, vec_els);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+ gsi);
+ new_temp = new_temp2;
+ }
+
/* Store vector loads in the corresponding SLP_NODE. */
if (slp)
slp_node->push_vec_def (new_stmt);
tree final_mask = NULL_TREE;
tree final_len = NULL_TREE;
tree bias = NULL_TREE;
+
if (!costing_p)
{
if (mask)
bias = build_int_cst (intQI_type_node, biasval);
}
+ tree vec_els;
+
if (final_len)
{
tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
gcall *call;
if (partial_ifn == IFN_MASK_LEN_LOAD)
- call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
- dataref_ptr, ptr,
- final_mask, final_len,
- bias);
+ {
+ vec_els = vect_get_mask_load_else
+ (maskload_elsval, vectype);
+ if (type_mode_padding_p
+ && maskload_elsval != MASK_LOAD_ELSE_ZERO)
+ need_zeroing = true;
+ call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
+ 6, dataref_ptr, ptr,
+ final_mask, vec_els,
+ final_len, bias);
+ }
else
call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
dataref_ptr, ptr,
else if (final_mask)
{
tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
- gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
+ vec_els = vect_get_mask_load_else
+ (maskload_elsval, vectype);
+ if (type_mode_padding_p
+ && maskload_elsval != MASK_LOAD_ELSE_ZERO)
+ need_zeroing = true;
+ gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
dataref_ptr, ptr,
- final_mask);
+ final_mask,
+ vec_els);
gimple_call_set_nothrow (call, true);
new_stmt = call;
data_ref = NULL_TREE;
vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
new_stmt = gimple_build_assign (vec_dest, data_ref);
}
- new_temp = make_ssa_name (vec_dest, new_stmt);
+
+ new_temp = need_zeroing
+ ? make_ssa_name (vectype)
+ : make_ssa_name (vec_dest, new_stmt);
gimple_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+
+ /* If we need to explicitly zero inactive elements emit a
+ VEC_COND_EXPR that does so. */
+ if (need_zeroing)
+ {
+ vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
+ vectype);
+
+ tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
+ new_stmt
+ = gimple_build_assign (new_temp2, VEC_COND_EXPR,
+ final_mask, new_temp, vec_els);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+ gsi);
+ new_temp = new_temp2;
+ }
}
/* 3. Handle explicit realignment if necessary/supported.