}
}
+/* Return whether if the load permutation of NODE is consecutive starting
+ from index START_IDX. */
+
+bool
+vect_load_perm_consecutive_p (slp_tree node, unsigned start_idx)
+{
+ load_permutation_t perm = SLP_TREE_LOAD_PERMUTATION (node);
+
+ if (!perm.exists () || perm.length () < start_idx)
+ return false;
+
+ unsigned int start = perm[start_idx];
+ for (unsigned int i = start_idx + 1; i < perm.length (); i++)
+ if (perm[i] != start + (unsigned int)i)
+ return false;
+
+ return true;
+}
+
/* Process the set of LOADS that are all from the same dataref group. */
static void
ld_lanes_lanes = 0;
break;
}
- for (unsigned i = 1; i < SLP_TREE_LANES (load); ++i)
- if (SLP_TREE_LOAD_PERMUTATION (load)[i] != first + i)
- {
- ld_lanes_lanes = 0;
- break;
- }
+ if (!vect_load_perm_consecutive_p (load))
+ {
+ ld_lanes_lanes = 0;
+ break;
+ }
}
/* Only a power-of-two number of lanes matches interleaving with N levels.
continue;
/* Build the permute to get the original load permutation order. */
- bool contiguous = true;
+ bool contiguous = vect_load_perm_consecutive_p (load);
lane_permutation_t final_perm;
final_perm.create (SLP_TREE_LANES (load));
for (unsigned i = 0; i < SLP_TREE_LANES (load); ++i)
- {
- final_perm.quick_push
- (std::make_pair (0, SLP_TREE_LOAD_PERMUTATION (load)[i]));
- if (i != 0
- && (SLP_TREE_LOAD_PERMUTATION (load)[i]
- != SLP_TREE_LOAD_PERMUTATION (load)[i-1] + 1))
- contiguous = false;
- }
+ final_perm.quick_push (
+ std::make_pair (0, SLP_TREE_LOAD_PERMUTATION (load)[i]));
/* When the load permutation accesses a contiguous unpermuted,
power-of-two aligned and sized chunk leave the load alone.
else
{
loop_vec_info loop_vinfo = as_a<loop_vec_info> (m_vinfo);
- stmt_vec_info load_info;
- bool this_load_permuted = false;
- unsigned j;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
- if (SLP_TREE_LOAD_PERMUTATION (node)[j] != j)
- {
- this_load_permuted = true;
- break;
- }
+ bool this_load_permuted = !vect_load_perm_consecutive_p (node);
/* When this isn't a grouped access we know it's single element
and contiguous. */
if (!STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (node)[0]))
return NULL_TREE;
}
+/* Check if the load permutation of NODE only refers to a consecutive
+ subset of the group indices where GROUP_SIZE is the size of the
+ dataref's group. We also assert that the length of the permutation
+ divides the group size and is a power of two.
+ Such load permutations can be elided in strided access schemes as
+ we can "jump over" the gap they leave. */
+
+bool
+has_consecutive_load_permutation (slp_tree node, unsigned group_size)
+{
+ load_permutation_t perm = SLP_TREE_LOAD_PERMUTATION (node);
+ if (!perm.exists ()
+ || perm.length () <= 1
+ || !pow2p_hwi (perm.length ())
+ || group_size % perm.length ())
+ return false;
+
+ return vect_load_perm_consecutive_p (node);
+}
+
+
/* Analyze load or store SLP_NODE of type VLS_TYPE. Return true
if there is a memory access type that the vectorized form can use,
storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
*ls_type = NULL_TREE;
*slp_perm = false;
*n_perms = -1U;
+ ls->subchain_p = false;
bool perm_ok = true;
poly_int64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
first_dr_info = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
if (STMT_VINFO_STRIDED_P (first_stmt_info))
- /* Try to use consecutive accesses of as many elements as possible,
- separated by the stride, until we have a complete vector.
- Fall back to scalar accesses if that isn't possible. */
- *memory_access_type = VMAT_STRIDED_SLP;
+ {
+ /* Try to use consecutive accesses of as many elements as possible,
+ separated by the stride, until we have a complete vector.
+ Fall back to scalar accesses if that isn't possible. */
+ *memory_access_type = VMAT_STRIDED_SLP;
+
+ /* If the load permutation is consecutive we can reduce the group to
+ the elements the permutation accesses. Then we release the
+ permutation. */
+ if (has_consecutive_load_permutation (slp_node, group_size))
+ {
+ ls->subchain_p = true;
+ group_size = SLP_TREE_LANES (slp_node);
+ SLP_TREE_LOAD_PERMUTATION (slp_node).release ();
+ }
+ }
else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
vect_memory_access_type grouped_gather_fallback = VMAT_UNINITIALIZED;
if (loop_vinfo
&& (*memory_access_type == VMAT_ELEMENTWISE
- || *memory_access_type == VMAT_STRIDED_SLP)
- && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ || *memory_access_type == VMAT_STRIDED_SLP))
{
gather_scatter_info gs_info;
if (SLP_TREE_LANES (slp_node) == 1
if (grouped_load)
{
- first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ /* If we elided a consecutive load permutation, don't
+ use the original first statement (which could be elided)
+ but the one the load permutation starts with.
+ This ensures the stride_base below is correct. */
+ if (!ls.subchain_p)
+ first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ else
+ first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
ref_type = get_group_alias_ptr_type (first_stmt_info);
}
if (grouped_load)
{
if (memory_access_type == VMAT_STRIDED_SLP)
- group_size = DR_GROUP_SIZE (first_stmt_info);
+ {
+ /* If we elided a consecutive load permutation, adjust
+ the group size here. */
+ if (!ls.subchain_p)
+ group_size = DR_GROUP_SIZE (first_stmt_info);
+ else
+ group_size = SLP_TREE_LANES (slp_node);
+ }
else /* VMAT_ELEMENTWISE */
group_size = SLP_TREE_LANES (slp_node);
}
tree decl; // VMAT_GATHER_SCATTER_DECL
} gs;
tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
+ /* Load/store type with larger element mode used for punning the vectype. */
tree ls_type; // VMAT_GATHER_SCATTER_IFN
/* This is set to a supported offset vector type if we don't support the
originally requested offset type, otherwise NULL.
/* True if the load requires a load permutation. */
bool slp_perm; // SLP_TREE_LOAD_PERMUTATION
unsigned n_perms; // SLP_TREE_LOAD_PERMUTATION
+ /* Whether the load permutation is consecutive and simple. */
+ bool subchain_p; // VMAT_STRIDED_SLP and VMAT_GATHER_SCATTER
};
/* A computation tree of an SLP instance. Each node corresponds to a group of
extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
gimple_stmt_iterator *);
extern tree vect_get_mask_load_else (int, tree);
+extern bool vect_load_perm_consecutive_p (slp_tree, unsigned = 0);
/* In tree-vect-patterns.cc. */
extern void