{
internal_fn ifn;
tree offset_vectype;
+ int scale;
vec<int> elsvals;
};
if (!offset_vectype)
continue;
- vec<int> elsvals = vNULL;
+ /* Try multiple scale values. Start with exact match, then try
+ smaller common scales that a target might support . */
+ int scales_to_try[] = {scale, 1, 2, 4, 8};
- /* If we haven't determined which IFN is supported yet, try all three
- to find which one the target supports. */
- if (ifn == IFN_LAST)
+ for (unsigned int j = 0;
+ j < sizeof (scales_to_try) / sizeof (*scales_to_try);
+ j++)
{
- ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
- vectype, memory_type,
- offset_vectype, scale, &elsvals);
- if (ifn != IFN_LAST)
+ int try_scale = scales_to_try[j];
+
+ /* Skip scales >= requested scale (except for exact match). */
+ if (j > 0 && try_scale >= scale)
+ continue;
+
+ /* Skip if requested scale is not a multiple of this scale. */
+ if (j > 0 && scale % try_scale != 0)
+ continue;
+
+ vec<int> elsvals = vNULL;
+
+ /* If we haven't determined which IFN is supported yet, try all three
+ to find which one the target supports. */
+ if (ifn == IFN_LAST)
{
- /* Found which IFN is supported. Save this configuration. */
- gather_scatter_config config;
- config.ifn = ifn;
- config.offset_vectype = offset_vectype;
- config.elsvals = elsvals;
- configs.safe_push (config);
+ ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
+ vectype, memory_type,
+ offset_vectype, try_scale,
+ &elsvals);
+ if (ifn != IFN_LAST)
+ {
+ /* Found which IFN is supported. Save this configuration. */
+ gather_scatter_config config;
+ config.ifn = ifn;
+ config.offset_vectype = offset_vectype;
+ config.scale = try_scale;
+ config.elsvals = elsvals;
+ configs.safe_push (config);
+ }
}
- }
- else
- {
- /* We already know which IFN is supported, just check if this
- offset type works with it. */
- if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
- offset_vectype, scale,
- &elsvals))
+ else
{
- gather_scatter_config config;
- config.ifn = ifn;
- config.offset_vectype = offset_vectype;
- config.elsvals = elsvals;
- configs.safe_push (config);
+ /* We already know which IFN is supported, just check if this
+ offset type and scale work with it. */
+ if (internal_gather_scatter_fn_supported_p (ifn, vectype,
+ memory_type,
+ offset_vectype,
+ try_scale,
+ &elsvals))
+ {
+ gather_scatter_config config;
+ config.ifn = ifn;
+ config.offset_vectype = offset_vectype;
+ config.scale = try_scale;
+ config.elsvals = elsvals;
+ configs.safe_push (config);
+ }
}
}
}
base address. If OFFSET_TYPE is scalar the function chooses an
appropriate vector type for it. SCALE is the amount by which the
offset should be multiplied *after* it has been converted to address width.
+ If the target does not support the requested SCALE, SUPPORTED_SCALE
+ will contain the scale that is actually supported
+ (which may be smaller, requiring additional multiplication).
+ Otherwise SUPPORTED_SCALE is 0.
Return true if the function is supported, storing the function id in
*IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
bool
vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
tree vectype, tree memory_type, tree offset_type,
- int scale, internal_fn *ifn_out,
+ int scale, int *supported_scale,
+ internal_fn *ifn_out,
tree *offset_vectype_out,
tree *supported_offset_vectype,
vec<int> *elsvals)
{
*supported_offset_vectype = NULL_TREE;
+ *supported_scale = 0;
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
unsigned int element_bits = vector_element_bits (vectype);
if (element_bits != memory_bits)
if (configs.is_empty ())
return false;
- /* First, try to find a configuration that matches our offset type
- (no conversion needed). */
+ /* Selection priority:
+ 1 - Exact scale match + offset type match
+ 2 - Exact scale match + sign-swapped offset
+ 3 - Smaller scale + offset type match
+ 4 - Smaller scale + sign-swapped offset
+ Within each category, prefer smaller offset types. */
+
+ /* First pass: exact scale match with no conversion. */
for (unsigned int i = 0; i < configs.length (); i++)
{
- if (TYPE_SIGN (configs[i].offset_vectype) == TYPE_SIGN (offset_vectype))
+ if (configs[i].scale == scale
+ && TYPE_SIGN (configs[i].offset_vectype)
+ == TYPE_SIGN (offset_vectype))
{
*ifn_out = configs[i].ifn;
*offset_vectype_out = configs[i].offset_vectype;
}
}
- /* No direct match. This means we try to find a sign-swapped offset
- vectype. */
+ /* No direct match. This means we try to find either
+ - a sign-swapped offset vectype or
+ - a different scale and 2x larger offset type
+ - a different scale and larger sign-swapped offset vectype. */
unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype));
unsigned int needed_precision
= TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
+ /* Second pass: No direct match. This means we try to find a sign-swapped
+ offset vectype. */
enum tree_code tmp;
for (unsigned int i = 0; i < configs.length (); i++)
{
unsigned int precision
= TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
- if (precision >= needed_precision
+ if (configs[i].scale == scale
+ && precision >= needed_precision
+ && (supportable_convert_operation (CONVERT_EXPR,
+ configs[i].offset_vectype,
+ offset_vectype, &tmp)
+ || (needed_precision == offset_precision
+ && tree_nop_conversion_p (configs[i].offset_vectype,
+ offset_vectype))))
+ {
+ *ifn_out = configs[i].ifn;
+ *offset_vectype_out = offset_vectype;
+ *supported_offset_vectype = configs[i].offset_vectype;
+ if (elsvals)
+ *elsvals = configs[i].elsvals;
+ return true;
+ }
+ }
+
+ /* Third pass: Try a smaller scale with the same signedness. */
+ needed_precision = offset_precision * 2;
+ needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
+
+ for (unsigned int i = 0; i < configs.length (); i++)
+ {
+ unsigned int precision
+ = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
+ if (configs[i].scale < scale
+ && precision >= needed_precision
+ && (supportable_convert_operation (CONVERT_EXPR,
+ configs[i].offset_vectype,
+ offset_vectype, &tmp)
+ || (needed_precision == offset_precision
+ && tree_nop_conversion_p (configs[i].offset_vectype,
+ offset_vectype))))
+ {
+ *ifn_out = configs[i].ifn;
+ *offset_vectype_out = configs[i].offset_vectype;
+ *supported_scale = configs[i].scale;
+ if (elsvals)
+ *elsvals = configs[i].elsvals;
+ return true;
+ }
+ }
+
+ /* Fourth pass: Try a smaller scale and sign-swapped offset vectype. */
+ needed_precision
+ = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
+ needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
+
+ for (unsigned int i = 0; i < configs.length (); i++)
+ {
+ unsigned int precision
+ = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
+ if (configs[i].scale < scale
+ && precision >= needed_precision
&& (supportable_convert_operation (CONVERT_EXPR,
configs[i].offset_vectype,
offset_vectype, &tmp)
*ifn_out = configs[i].ifn;
*offset_vectype_out = offset_vectype;
*supported_offset_vectype = configs[i].offset_vectype;
+ *supported_scale = configs[i].scale;
if (elsvals)
*elsvals = configs[i].elsvals;
return true;
base = fold_convert (sizetype, base);
base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+ int tmp_scale;
tree tmp_offset_vectype;
/* OFF at this point may be either a SSA_NAME or some tree expression
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
signed_char_type_node,
- new_scale, &ifn,
+ new_scale, &tmp_scale,
+ &ifn,
&offset_vectype,
&tmp_offset_vectype,
elsvals)
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
unsigned_char_type_node,
- new_scale, &ifn,
+ new_scale, &tmp_scale,
+ &ifn,
&offset_vectype,
&tmp_offset_vectype,
elsvals))
&& !POINTER_TYPE_P (TREE_TYPE (off))
&& vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
- TREE_TYPE (off), scale, &ifn,
+ TREE_TYPE (off),
+ scale, &tmp_scale,
+ &ifn,
&offset_vectype,
&tmp_offset_vectype,
elsvals))
if (use_ifn_p)
{
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
- vectype, memory_type, offtype, scale,
+ vectype, memory_type, offtype,
+ scale, &tmp_scale,
&ifn, &offset_vectype,
&tmp_offset_vectype,
elsvals))
we chose a different one use this instead. */
if (ls->supported_offset_vectype)
off_vectype = ls->supported_offset_vectype;
+ /* Same for scale. */
+ if (ls->supported_scale)
+ scale = ls->supported_scale;
if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
memory_type,
no narrower than OFFSET_TYPE. */
tree memory_type = TREE_TYPE (DR_REF (dr));
tree tmp_offset_vectype;
+ int tmp_scale;
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
- vectype, memory_type, offset_type, scale,
+ vectype, memory_type, offset_type,
+ scale, &tmp_scale,
&gs_info->ifn, &gs_info->offset_vectype,
&tmp_offset_vectype, elsvals)
|| gs_info->ifn == IFN_LAST)
not available we still have a strided load/store. */
bool ok = false;
tree tmp_vectype;
+ int tmp_scale;
if (vect_gather_scatter_fn_p
(loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
- TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn,
+ TREE_TYPE (*pun_vectype), *pun_vectype, 1, &tmp_scale, &ifn,
&offset_vectype, &tmp_vectype, elsvals))
ok = true;
else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
bool *slp_perm = &ls->slp_perm;
unsigned *n_perms = &ls->n_perms;
tree *supported_offset_vectype = &ls->supported_offset_vectype;
+ int *supported_scale = &ls->supported_scale;
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
tree tem;
if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
masked_p, vectype, memory_type,
- offset_vectype, scale,
+ offset_vectype, scale, supported_scale,
&ls->gs.ifn, &tem,
supported_offset_vectype, elsvals))
{
dump_printf_loc (MSG_NOTE, vect_location,
" target supports offset type %T.\n",
*supported_offset_vectype);
+ if (*supported_scale)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " target supports offset scale %d.\n",
+ *supported_scale);
}
*memory_access_type = VMAT_GATHER_SCATTER_IFN;
}
gcc_assert (vect_gather_scatter_fn_p
(loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
gs_info.memory_type, TREE_TYPE (gs_info.offset),
- gs_info.scale, &gs_info.ifn,
+ gs_info.scale, supported_scale, &gs_info.ifn,
&tmp, supported_offset_vectype, elsvals));
SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
inside_cost
+= record_stmt_cost (cost_vec, 1, vector_stmt,
slp_node, 0, vect_body);
+ if (ls.supported_scale)
+ inside_cost
+ += record_stmt_cost (cost_vec, 1, vector_stmt,
+ slp_node, 0, vect_body);
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
- /* Perform the offset conversion if necessary. */
- if (!strided && ls.supported_offset_vectype)
+ /* Perform the offset conversion and scaling if necessary. */
+ if (!strided
+ && (ls.supported_offset_vectype || ls.supported_scale))
{
gimple_seq stmts = NULL;
- vec_offset = gimple_convert
- (&stmts, ls.supported_offset_vectype, vec_offset);
+ if (ls.supported_offset_vectype)
+ vec_offset = gimple_convert
+ (&stmts, ls.supported_offset_vectype, vec_offset);
+ if (ls.supported_scale)
+ {
+ tree mult_cst = build_int_cst
+ (TREE_TYPE (TREE_TYPE (vec_offset)),
+ SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+ tree mult = build_vector_from_val
+ (TREE_TYPE (vec_offset), mult_cst);
+ vec_offset = gimple_build
+ (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+ vec_offset, mult);
+ scale = size_int (ls.supported_scale);
+ }
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
}
inside_cost
+= record_stmt_cost (cost_vec, 1, vector_stmt,
slp_node, 0, vect_body);
+ if (ls.supported_scale)
+ inside_cost
+ += record_stmt_cost (cost_vec, 1, vector_stmt,
+ slp_node, 0, vect_body);
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
- /* Perform the offset conversion if necessary. */
- if (!strided && ls.supported_offset_vectype)
+ /* Perform the offset conversion and scaling if necessary. */
+ if (!strided
+ && (ls.supported_offset_vectype || ls.supported_scale))
{
gimple_seq stmts = NULL;
- vec_offset = gimple_convert
- (&stmts, ls.supported_offset_vectype, vec_offset);
+ if (ls.supported_offset_vectype)
+ vec_offset = gimple_convert
+ (&stmts, ls.supported_offset_vectype, vec_offset);
+ if (ls.supported_scale)
+ {
+ tree mult_cst = build_int_cst
+ (TREE_TYPE (TREE_TYPE (vec_offset)),
+ SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+ tree mult = build_vector_from_val
+ (TREE_TYPE (vec_offset), mult_cst);
+ vec_offset = gimple_build
+ (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+ vec_offset, mult);
+ scale = size_int (ls.supported_scale);
+ }
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
}