return opt_result::success ();
}
+/* Structure to hold information about a supported gather/scatter
+ configuration. */
+struct gather_scatter_config
+{
+ internal_fn ifn;
+ tree offset_vectype;
+ vec<int> elsvals;
+};
+
+/* Determine which gather/scatter IFN is supported for the given parameters.
+ IFN_MASK_GATHER_LOAD, IFN_GATHER_LOAD, and IFN_MASK_LEN_GATHER_LOAD
+ are mutually exclusive, so we only need to find one. Return the
+ supported IFN or IFN_LAST if none are supported. */
+
+static internal_fn
+vect_gather_scatter_which_ifn (bool read_p, bool masked_p,
+ tree vectype, tree memory_type,
+ tree offset_vectype, int scale,
+ vec<int> *elsvals)
+{
+ /* Work out which functions to try. */
+ internal_fn ifn, alt_ifn, alt_ifn2;
+ if (read_p)
+ {
+ ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
+ alt_ifn = IFN_MASK_GATHER_LOAD;
+ alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
+ }
+ else
+ {
+ ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
+ alt_ifn = IFN_MASK_SCATTER_STORE;
+ alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
+ }
+
+ if (!offset_vectype)
+ return IFN_LAST;
+
+ if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
+ offset_vectype, scale, elsvals))
+ return ifn;
+ if (internal_gather_scatter_fn_supported_p (alt_ifn, vectype, memory_type,
+ offset_vectype, scale, elsvals))
+ return alt_ifn;
+ if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, memory_type,
+ offset_vectype, scale, elsvals))
+ return alt_ifn2;
+
+ return IFN_LAST;
+}
+
+/* Collect all supported offset vector types for a gather load or scatter
+ store. READ_P is true for loads and false for stores. MASKED_P is true
+ if the load or store is conditional. VECTYPE is the data vector type.
+ MEMORY_TYPE is the type of the memory elements being loaded or stored,
+ and OFFSET_TYPE is the type of the offset.
+ SCALE is the amount by which the offset should be multiplied.
+
+ Return a vector of all configurations the target supports (which can
+ be none). */
+
+static auto_vec<gather_scatter_config>
+vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
+ tree vectype, tree memory_type,
+ tree offset_type, int scale)
+{
+ auto_vec<gather_scatter_config> configs;
+
+ auto_vec<tree, 8> offset_types_to_try;
+
+ /* Try all sizes from the offset type's precision up to POINTER_SIZE. */
+ for (unsigned int bits = TYPE_PRECISION (offset_type);
+ bits <= POINTER_SIZE;
+ bits *= 2)
+ {
+ /* Signed variant. */
+ offset_types_to_try.safe_push
+ (build_nonstandard_integer_type (bits, 0));
+ /* Unsigned variant. */
+ offset_types_to_try.safe_push
+ (build_nonstandard_integer_type (bits, 1));
+ }
+
+ /* Once we find which IFN works for one offset type, we know that it
+ will work for other offset types as well. Then we can perform
+ the checks for the remaining offset types with only that IFN.
+ However, we might need to try different offset types to find which
+ IFN is supported, since the check is offset-type-specific. */
+ internal_fn ifn = IFN_LAST;
+
+ /* Try each offset type. */
+ for (unsigned int i = 0; i < offset_types_to_try.length (); i++)
+ {
+ tree offset_type = offset_types_to_try[i];
+ tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
+ if (!offset_vectype)
+ continue;
+
+ vec<int> elsvals = vNULL;
+
+ /* If we haven't determined which IFN is supported yet, try all three
+ to find which one the target supports. */
+ if (ifn == IFN_LAST)
+ {
+ ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
+ vectype, memory_type,
+ offset_vectype, scale, &elsvals);
+ if (ifn != IFN_LAST)
+ {
+ /* Found which IFN is supported. Save this configuration. */
+ gather_scatter_config config;
+ config.ifn = ifn;
+ config.offset_vectype = offset_vectype;
+ config.elsvals = elsvals;
+ configs.safe_push (config);
+ }
+ }
+ else
+ {
+ /* We already know which IFN is supported, just check if this
+ offset type works with it. */
+ if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
+ offset_vectype, scale,
+ &elsvals))
+ {
+ gather_scatter_config config;
+ config.ifn = ifn;
+ config.offset_vectype = offset_vectype;
+ config.elsvals = elsvals;
+ configs.safe_push (config);
+ }
+ }
+ }
+
+ return configs;
+}
+
/* Check whether we can use an internal function for a gather load
or scatter store. READ_P is true for loads and false for stores.
MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
Return true if the function is supported, storing the function id in
*IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
+ If we support an offset vector type with different signedness than
+ OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE.
- If we can use gather and store the possible else values in ELSVALS. */
+ If we can use gather/scatter and ELSVALS is nonzero, store the possible
+ else values in ELSVALS. */
bool
vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
tree vectype, tree memory_type, tree offset_type,
int scale, internal_fn *ifn_out,
- tree *offset_vectype_out, vec<int> *elsvals)
+ tree *offset_vectype_out,
+ tree *supported_offset_vectype,
+ vec<int> *elsvals)
{
+ *supported_offset_vectype = NULL_TREE;
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
unsigned int element_bits = vector_element_bits (vectype);
if (element_bits != memory_bits)
memory elements. */
return false;
- /* Work out which function we need. */
- internal_fn ifn, alt_ifn, alt_ifn2;
- if (read_p)
- {
- ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
- alt_ifn = IFN_MASK_GATHER_LOAD;
- /* When target supports MASK_LEN_GATHER_LOAD, we always
- use MASK_LEN_GATHER_LOAD regardless whether len and
- mask are valid or not. */
- alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
- }
- else
- {
- ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
- alt_ifn = IFN_MASK_SCATTER_STORE;
- /* When target supports MASK_LEN_SCATTER_STORE, we always
- use MASK_LEN_SCATTER_STORE regardless whether len and
- mask are valid or not. */
- alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
- }
+ /* Get the original offset vector type for comparison. */
+ tree offset_vectype = VECTOR_TYPE_P (offset_type)
+ ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type);
- for (;;)
- {
- tree offset_vectype;
- if (VECTOR_TYPE_P (offset_type))
- offset_vectype = offset_type;
- else
- {
- offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
- if (!offset_vectype)
- return false;
- }
+ offset_type = TREE_TYPE (offset_vectype);
- /* Test whether the target supports this combination. */
- if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
- offset_vectype, scale,
- elsvals))
- {
- *ifn_out = ifn;
- *offset_vectype_out = offset_vectype;
- return true;
- }
- else if (!masked_p
- && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
- memory_type,
- offset_vectype,
- scale, elsvals))
+ /* Get all supported configurations for this data vector type. */
+ auto_vec<gather_scatter_config> configs
+ = vect_gather_scatter_get_configs (vinfo, read_p, masked_p, vectype,
+ memory_type, offset_type, scale);
+
+ if (configs.is_empty ())
+ return false;
+
+ /* First, try to find a configuration that matches our offset type
+ (no conversion needed). */
+ for (unsigned int i = 0; i < configs.length (); i++)
+ {
+ if (TYPE_SIGN (configs[i].offset_vectype) == TYPE_SIGN (offset_vectype))
{
- *ifn_out = alt_ifn;
- *offset_vectype_out = offset_vectype;
+ *ifn_out = configs[i].ifn;
+ *offset_vectype_out = configs[i].offset_vectype;
+ if (elsvals)
+ *elsvals = configs[i].elsvals;
return true;
}
- else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
- memory_type,
- offset_vectype, scale,
- elsvals))
+ }
+
+ /* No direct match. This means we try to find a sign-swapped offset
+ vectype. */
+ unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype));
+ unsigned int needed_precision
+ = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
+ needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
+
+ enum tree_code tmp;
+ for (unsigned int i = 0; i < configs.length (); i++)
+ {
+ unsigned int precision
+ = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
+ if (precision >= needed_precision
+ && (supportable_convert_operation (CONVERT_EXPR,
+ configs[i].offset_vectype,
+ offset_vectype, &tmp)
+ || (needed_precision == offset_precision
+ && tree_nop_conversion_p (configs[i].offset_vectype,
+ offset_vectype))))
{
- *ifn_out = alt_ifn2;
+ *ifn_out = configs[i].ifn;
*offset_vectype_out = offset_vectype;
+ *supported_offset_vectype = configs[i].offset_vectype;
+ if (elsvals)
+ *elsvals = configs[i].elsvals;
return true;
}
-
- /* For fixed offset vector type we're done. */
- if (VECTOR_TYPE_P (offset_type))
- return false;
-
- if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
- && TYPE_PRECISION (offset_type) >= element_bits)
- return false;
-
- /* Try a larger offset vector type. */
- offset_type = build_nonstandard_integer_type
- (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type));
}
+
+ return false;
}
/* STMT_INFO is a call to an internal gather load or scatter store function.
base = fold_convert (sizetype, base);
base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+ tree tmp_offset_vectype;
/* OFF at this point may be either a SSA_NAME or some tree expression
from get_inner_reference. Try to peel off loop invariants from it
signed_char_type_node,
new_scale, &ifn,
&offset_vectype,
+ &tmp_offset_vectype,
elsvals)
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
unsigned_char_type_node,
new_scale, &ifn,
&offset_vectype,
+ &tmp_offset_vectype,
elsvals))
break;
scale = new_scale;
&& vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
TREE_TYPE (off), scale, &ifn,
- &offset_vectype, elsvals))
+ &offset_vectype,
+ &tmp_offset_vectype,
+ elsvals))
break;
if (TYPE_PRECISION (TREE_TYPE (op0))
{
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
vectype, memory_type, offtype, scale,
- &ifn, &offset_vectype, elsvals))
+ &ifn, &offset_vectype,
+ &tmp_offset_vectype,
+ elsvals))
ifn = IFN_LAST;
decl = NULL_TREE;
}
: ls->strided_offset_vectype);
tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
int scale = SLP_TREE_GS_SCALE (slp_node);
+
+ /* The following "supported" checks just verify what we established in
+ get_load_store_type and don't try different offset types.
+ Therefore, off_vectype must be a supported offset type. In case
+ we chose a different one use this instead. */
+ if (ls->supported_offset_vectype)
+ off_vectype = ls->supported_offset_vectype;
+
if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
memory_type,
off_vectype, scale,
/* See whether the target supports the operation with an offset
no narrower than OFFSET_TYPE. */
tree memory_type = TREE_TYPE (DR_REF (dr));
+ tree tmp_offset_vectype;
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
vectype, memory_type, offset_type, scale,
&gs_info->ifn, &gs_info->offset_vectype,
- elsvals)
+ &tmp_offset_vectype, elsvals)
|| gs_info->ifn == IFN_LAST)
continue;
type must exist) so it is possible that even though a gather/scatter is
not available we still have a strided load/store. */
bool ok = false;
+ tree tmp_vectype;
if (vect_gather_scatter_fn_p
(loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn,
- &offset_vectype, elsvals))
+ &offset_vectype, &tmp_vectype, elsvals))
ok = true;
else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
elsvals))
tree *ls_type = &ls->ls_type;
bool *slp_perm = &ls->slp_perm;
unsigned *n_perms = &ls->n_perms;
+ tree *supported_offset_vectype = &ls->supported_offset_vectype;
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
tree tem;
if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
- masked_p, vectype,
- memory_type,
+ masked_p, vectype, memory_type,
offset_vectype, scale,
&ls->gs.ifn, &tem,
- elsvals))
- *memory_access_type = VMAT_GATHER_SCATTER_IFN;
+ supported_offset_vectype, elsvals))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "gather/scatter with required "
+ "offset type "
+ "%T and offset scale %d.\n",
+ offset_vectype, scale);
+ if (*supported_offset_vectype)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " target supports offset type %T.\n",
+ *supported_offset_vectype);
+ }
+ *memory_access_type = VMAT_GATHER_SCATTER_IFN;
+ }
else if (vls_type == VLS_LOAD
? (targetm.vectorize.builtin_gather
&& (ls->gs.decl
masked_p, &gs_info, elsvals,
group_size, single_element_p))
{
+ /* vect_use_strided_gather_scatters_p does not save the actually
+ supported scale and offset type so do that here.
+ We need it later in check_load_store_for_partial_vectors
+ where we only check if the given internal function is supported
+ (to choose whether to use the IFN, LEGACY, or EMULATED flavor
+ of gather/scatter) and don't re-do the full analysis. */
+ tree tmp;
+ gcc_assert (vect_gather_scatter_fn_p
+ (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
+ gs_info.memory_type, TREE_TYPE (gs_info.offset),
+ gs_info.scale, &gs_info.ifn,
+ &tmp, supported_offset_vectype, elsvals));
+
SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
SLP_TREE_GS_BASE (slp_node) = error_mark_node;
ls->gs.ifn = gs_info.ifn;
{
if (costing_p)
{
+ if (ls.supported_offset_vectype)
+ inside_cost
+ += record_stmt_cost (cost_vec, 1, vector_stmt,
+ slp_node, 0, vect_body);
+
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
+= record_stmt_cost (cost_vec, cnunits, scalar_store,
vec_offset = vec_offsets[j];
tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
+ bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
+
+ /* Perform the offset conversion if necessary. */
+ if (!strided && ls.supported_offset_vectype)
+ {
+ gimple_seq stmts = NULL;
+ vec_offset = gimple_convert
+ (&stmts, ls.supported_offset_vectype, vec_offset);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ }
if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
{
{
if (costing_p)
{
+ if (ls.supported_offset_vectype)
+ inside_cost
+ += record_stmt_cost (cost_vec, 1, vector_stmt,
+ slp_node, 0, vect_body);
+
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
= record_stmt_cost (cost_vec, cnunits, scalar_load,
vec_offset = vec_offsets[i];
tree zero = build_zero_cst (vectype);
tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
+ bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
+
+ /* Perform the offset conversion if necessary. */
+ if (!strided && ls.supported_offset_vectype)
+ {
+ gimple_seq stmts = NULL;
+ vec_offset = gimple_convert
+ (&stmts, ls.supported_offset_vectype, vec_offset);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ }
if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
{