first_dr_info
= STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
if (STMT_VINFO_STRIDED_P (first_stmt_info))
- {
- /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
- separated by the stride, until we have a complete vector.
- Fall back to scalar accesses if that isn't possible. */
- if (multiple_p (nunits, group_size))
- *memory_access_type = VMAT_STRIDED_SLP;
- else
- *memory_access_type = VMAT_ELEMENTWISE;
- }
+ /* Try to use consecutive accesses of as many elements as possible,
+ separated by the stride, until we have a complete vector.
+ Fall back to scalar accesses if that isn't possible. */
+ *memory_access_type = VMAT_STRIDED_SLP;
else
{
int cmp = compare_step_with_zero (vinfo, stmt_info);
tree lvectype = vectype;
if (slp)
{
- if (group_size < const_nunits
- && const_nunits % group_size == 0)
+ HOST_WIDE_INT n = gcd (group_size, const_nunits);
+ if (n == const_nunits)
{
- nstores = const_nunits / group_size;
- lnel = group_size;
- ltype = build_vector_type (elem_type, group_size);
+ int mis_align = dr_misalignment (first_dr_info, vectype);
+ dr_alignment_support dr_align
+ = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
+ mis_align);
+ if (dr_align == dr_aligned
+ || dr_align == dr_unaligned_supported)
+ {
+ nstores = 1;
+ lnel = const_nunits;
+ ltype = vectype;
+ lvectype = vectype;
+ alignment_support_scheme = dr_align;
+ misalignment = mis_align;
+ }
+ }
+ else if (n > 1)
+ {
+ nstores = const_nunits / n;
+ lnel = n;
+ ltype = build_vector_type (elem_type, n);
lvectype = vectype;
/* First check if vec_extract optab doesn't support extraction
machine_mode vmode;
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|| !related_vector_mode (TYPE_MODE (vectype), elmode,
- group_size).exists (&vmode)
+ n).exists (&vmode)
|| (convert_optab_handler (vec_extract_optab,
TYPE_MODE (vectype), vmode)
== CODE_FOR_nothing))
re-interpreting it as the original vector type if
supported. */
unsigned lsize
- = group_size * GET_MODE_BITSIZE (elmode);
- unsigned int lnunits = const_nunits / group_size;
+ = n * GET_MODE_BITSIZE (elmode);
+ unsigned int lnunits = const_nunits / n;
/* If we can't construct such a vector fall back to
element extracts from the original vector type and
element size stores. */
!= CODE_FOR_nothing))
{
nstores = lnunits;
- lnel = group_size;
+ lnel = n;
ltype = build_nonstandard_integer_type (lsize, 1);
lvectype = build_vector_type (ltype, nstores);
}
issue exists here for reasonable archs. */
}
}
- else if (group_size >= const_nunits
- && group_size % const_nunits == 0)
- {
- int mis_align = dr_misalignment (first_dr_info, vectype);
- dr_alignment_support dr_align
- = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
- mis_align);
- if (dr_align == dr_aligned
- || dr_align == dr_unaligned_supported)
- {
- nstores = 1;
- lnel = const_nunits;
- ltype = vectype;
- lvectype = vectype;
- alignment_support_scheme = dr_align;
- misalignment = mis_align;
- }
- }
ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
auto_vec<tree> dr_chain;
if (memory_access_type == VMAT_STRIDED_SLP)
{
- if (group_size < const_nunits)
+ HOST_WIDE_INT n = gcd (group_size, const_nunits);
+ /* Use the target vector type if the group size is a multiple
+ of it. */
+ if (n == const_nunits)
+ {
+ nloads = 1;
+ lnel = const_nunits;
+ ltype = vectype;
+ }
+ /* Else use the biggest vector we can load the group without
+ accessing excess elements. */
+ else if (n > 1)
{
- /* First check if vec_init optab supports construction from vector
- elts directly. Otherwise avoid emitting a constructor of
- vector elements by performing the loads using an integer type
- of the same size, constructing a vector of those and then
- re-interpreting it as the original vector type. This avoids a
- huge runtime penalty due to the general inability to perform
- store forwarding from smaller stores to a larger load. */
tree ptype;
tree vtype
- = vector_vector_composition_type (vectype,
- const_nunits / group_size,
+ = vector_vector_composition_type (vectype, const_nunits / n,
&ptype);
if (vtype != NULL_TREE)
{
- nloads = const_nunits / group_size;
- lnel = group_size;
+ nloads = const_nunits / n;
+ lnel = n;
lvectype = vtype;
ltype = ptype;
}
}
- else
- {
- nloads = 1;
- lnel = const_nunits;
- ltype = vectype;
- }
+ /* Else fall back to the default element-wise access. */
ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
}
/* Load vector(1) scalar_type if it's 1 element-wise vectype. */