}
+enum scan_store_kind {
+ /* Normal permutation. */
+ scan_store_kind_perm,
+
+ /* Whole vector left shift permutation with zero init. */
+ scan_store_kind_lshift_zero,
+
+ /* Whole vector left shift permutation and VEC_COND_EXPR. */
+ scan_store_kind_lshift_cond
+};
+
/* Function check_scan_store.
Verify if we can perform the needed permutations or whole vector shifts.
- Return -1 on failure, otherwise exact log2 of vectype's nunits. */
+ Return -1 on failure, otherwise exact log2 of vectype's nunits.
+ USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
+ to do at each step. */
static int
-scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL)
+scan_store_can_perm_p (tree vectype, tree init,
+ vec<enum scan_store_kind> *use_whole_vector = NULL)
{
enum machine_mode vec_mode = TYPE_MODE (vectype);
unsigned HOST_WIDE_INT nunits;
return -1;
int i;
+ enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
for (i = 0; i <= units_log2; ++i)
{
unsigned HOST_WIDE_INT j, k;
+ enum scan_store_kind kind = scan_store_kind_perm;
vec_perm_builder sel (nunits, nunits, 1);
sel.quick_grow (nunits);
- if (i == 0)
+ if (i == units_log2)
{
for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1;
}
else
{
- for (j = 0; j < (HOST_WIDE_INT_1U << (i - 1)); ++j)
+ for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = j;
for (k = 0; j < nunits; ++j, ++k)
sel[j] = nunits + k;
}
- vec_perm_indices indices (sel, i == 0 ? 1 : 2, nunits);
+ vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
if (!can_vec_perm_const_p (vec_mode, indices))
- break;
- }
-
- if (i == 0)
- return -1;
-
- if (i <= units_log2)
- {
- if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
- return -1;
- int kind = 1;
- /* Whole vector shifts shift in zeros, so if init is all zero constant,
- there is no need to do anything further. */
- if ((TREE_CODE (init) != INTEGER_CST
- && TREE_CODE (init) != REAL_CST)
- || !initializer_zerop (init))
{
- tree masktype = build_same_sized_truth_vector_type (vectype);
- if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
+ if (i == units_log2)
return -1;
- kind = 2;
+
+ if (whole_vector_shift_kind == scan_store_kind_perm)
+ {
+ if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
+ return -1;
+ whole_vector_shift_kind = scan_store_kind_lshift_zero;
+ /* Whole vector shifts shift in zeros, so if init is all zero
+ constant, there is no need to do anything further. */
+ if ((TREE_CODE (init) != INTEGER_CST
+ && TREE_CODE (init) != REAL_CST)
+ || !initializer_zerop (init))
+ {
+ tree masktype = build_same_sized_truth_vector_type (vectype);
+ if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
+ return -1;
+ whole_vector_shift_kind = scan_store_kind_lshift_cond;
+ }
+ }
+ kind = whole_vector_shift_kind;
+ }
+ if (use_whole_vector)
+ {
+ if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
+ use_whole_vector->safe_grow_cleared (i);
+ if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
+ use_whole_vector->safe_push (kind);
}
- if (use_whole_vector_p)
- *use_whole_vector_p = kind;
}
+
return units_log2;
}
unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
gcc_unreachable ();
- int use_whole_vector_p = 0;
- int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector_p);
+ auto_vec<enum scan_store_kind, 16> use_whole_vector;
+ int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
gcc_assert (units_log2 > 0);
auto_vec<tree, 16> perms;
perms.quick_grow (units_log2 + 1);
+ tree zero_vec = NULL_TREE, masktype = NULL_TREE;
for (int i = 0; i <= units_log2; ++i)
{
unsigned HOST_WIDE_INT j, k;
if (i == units_log2)
for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1;
- else
- {
- for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
- sel[j] = j;
- for (k = 0; j < nunits; ++j, ++k)
- sel[j] = nunits + k;
- }
+ else
+ {
+ for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
+ sel[j] = j;
+ for (k = 0; j < nunits; ++j, ++k)
+ sel[j] = nunits + k;
+ }
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
- if (use_whole_vector_p && i < units_log2)
- perms[i] = vect_gen_perm_mask_any (vectype, indices);
+ if (!use_whole_vector.is_empty ()
+ && use_whole_vector[i] != scan_store_kind_perm)
+ {
+ if (zero_vec == NULL_TREE)
+ zero_vec = build_zero_cst (vectype);
+ if (masktype == NULL_TREE
+ && use_whole_vector[i] == scan_store_kind_lshift_cond)
+ masktype = build_same_sized_truth_vector_type (vectype);
+ perms[i] = vect_gen_perm_mask_any (vectype, indices);
+ }
else
perms[i] = vect_gen_perm_mask_checked (vectype, indices);
}
- tree zero_vec = use_whole_vector_p ? build_zero_cst (vectype) : NULL_TREE;
- tree masktype = (use_whole_vector_p == 2
- ? build_same_sized_truth_vector_type (vectype) : NULL_TREE);
stmt_vec_info prev_stmt_info = NULL;
tree vec_oprnd1 = NULL_TREE;
tree vec_oprnd2 = NULL_TREE;
{
tree new_temp = make_ssa_name (vectype);
gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
- zero_vec ? zero_vec : vec_oprnd1, v,
+ (zero_vec
+ && (use_whole_vector[i]
+ != scan_store_kind_perm))
+ ? zero_vec : vec_oprnd1, v,
perms[i]);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
if (prev_stmt_info == NULL)
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
- if (use_whole_vector_p == 2)
+ if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
{
/* Whole vector shift shifted in zero bits, but if *init
is not initializer_zerop, we need to replace those elements