--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3 -ftree-vectorize -fdump-tree-slp1-details" } */
+
+typedef short i16;
+
+void
+foo (i16 *__restrict out, i16 *__restrict in, i16 x, unsigned n)
+{
+ unsigned k = 0;
+ i16 tmp = x + 3;
+
+ while (k < n)
+ {
+ tmp = tmp ^ 0x3f;
+
+ out[k + 0] = in[k + 0] + tmp;
+ out[k + 1] = in[k + 1] + tmp;
+ out[k + 2] = in[k + 2] + tmp;
+ out[k + 3] = in[k + 3] + tmp;
+ out[k + 4] = in[k + 4] + tmp;
+ out[k + 5] = in[k + 5] + tmp;
+ out[k + 6] = in[k + 6] + tmp;
+ out[k + 7] = in[k + 7] + tmp;
+ out[k + 8] = in[k + 8] + tmp;
+ out[k + 9] = in[k + 9] + tmp;
+ out[k + 10] = in[k + 10] + tmp;
+ out[k + 11] = in[k + 11] + tmp;
+ out[k + 12] = in[k + 12] + tmp;
+ out[k + 13] = in[k + 13] + tmp;
+ out[k + 14] = in[k + 14] + tmp;
+ out[k + 15] = in[k + 15] + tmp;
+
+ k += 16;
+ }
+}
+
+/* { dg-final { scan-tree-dump "Re-trying with swapped operands of stmts" "slp1" } } */
+/* { dg-final { scan-tree-dump "Using a splat of the uniform operand" "slp1" } } */
+/* { dg-final { scan-tree-dump "optimized: basic block part vectorized" "slp1" } } */
}
}
+/* Distance from the node currently being discovered to the closest upthread
+ commutative operation whose operand-zero discovery may still be fixed by
+ retrying with swapped operands, or -1U if there is none. */
+
+static unsigned least_upthread_swappable_op_distance = -1U;
+
static slp_tree
vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
vec<stmt_vec_info> stmts, unsigned int group_size,
{
slp_tree child = nullptr;
unsigned int j;
+ unsigned old_swap_distance;
+ bool can_swap;
+ bool can_swap_nonmatching;
+ bool *stmt_can_swap;
/* We're skipping certain operands from processing, for example
outer loop reduction initial defs. */
def_stmts2.release ();
}
- if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
- group_size, &this_max_nunits,
- matches, limit,
- &this_tree_size, bst_map)) != NULL)
+ can_swap = (i == 0
+ && (nops == 2 || nops == 3)
+ && oprnds_info.length () > 1
+ && oprnds_info[1]->first_dt == vect_internal_def
+ && (is_gimple_assign (stmt_info->stmt)
+ || is_gimple_call (stmt_info->stmt))
+ /* Swapping operands for reductions breaks assumptions
+ later on. */
+ && STMT_VINFO_REDUC_IDX (stmt_info) == -1);
+ can_swap_nonmatching = can_swap;
+ stmt_can_swap = NULL;
+ if (can_swap)
+ {
+ stmt_can_swap = XALLOCAVEC (bool, group_size);
+ for (j = 0; j < group_size; ++j)
+ {
+ stmt_can_swap[j] = false;
+ if (!stmts[j])
+ /* NULL lanes are gaps and have no stmt to swap. */
+ stmt_can_swap[j] = true;
+ else if (gassign *stmt = dyn_cast <gassign *> (stmts[j]->stmt))
+ {
+ tree_code code = gimple_assign_rhs_code (stmt);
+ stmt_can_swap[j] = (commutative_tree_code (code)
+ || commutative_ternary_tree_code (code));
+ }
+ else if (gcall *call = dyn_cast <gcall *> (stmts[j]->stmt))
+ {
+ internal_fn fn = (gimple_call_internal_p (call)
+ ? gimple_call_internal_fn (call) : IFN_LAST);
+ stmt_can_swap[j] = ((commutative_binary_fn_p (fn)
+ || commutative_ternary_fn_p (fn))
+ && first_commutative_argument (fn) == 0);
+ }
+
+ if (j != 0 && !stmt_can_swap[j])
+ can_swap_nonmatching = false;
+ }
+ }
+
+ old_swap_distance = least_upthread_swappable_op_distance;
+ if (can_swap_nonmatching)
+ least_upthread_swappable_op_distance = 1;
+ else if (least_upthread_swappable_op_distance != -1U)
+ least_upthread_swappable_op_distance++;
+ child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
+ group_size, &this_max_nunits,
+ matches, limit,
+ &this_tree_size, bst_map);
+ least_upthread_swappable_op_distance = old_swap_distance;
+ if (child != NULL)
{
oprnd_info->def_stmts = vNULL;
children.safe_push (child);
/* If the SLP build for operand zero failed and operand zero
and one can be commuted try that for the scalar stmts
that failed the match. */
- if (i == 0
- /* A first scalar stmt mismatch signals a fatal mismatch. */
- && matches[0]
- /* ??? For COND_EXPRs we can swap the comparison operands
- as well as the arms under some constraints. */
- && (nops == 2 || nops == 3)
- && oprnds_info[1]->first_dt == vect_internal_def
- && (is_gimple_assign (stmt_info->stmt)
- || is_gimple_call (stmt_info->stmt))
- /* Swapping operands for reductions breaks assumptions later on. */
- && STMT_VINFO_REDUC_IDX (stmt_info) == -1)
+ if (/* A first scalar stmt mismatch signals a fatal mismatch. */
+ matches[0]
+ && can_swap)
{
/* See whether we can swap the matching or the non-matching
stmt operands. */
{
if (matches[j] != !swap_not_matching)
continue;
- stmt_vec_info stmt_info = stmts[j];
/* Verify if we can swap operands of this stmt. */
- if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
- {
- tree_code code = gimple_assign_rhs_code (stmt);
- if (! commutative_tree_code (code)
- && ! commutative_ternary_tree_code (code))
- {
- if (!swap_not_matching)
- goto fail;
- swap_not_matching = false;
- break;
- }
- }
- else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
+ if (!stmt_can_swap[j])
{
- internal_fn fn = (gimple_call_internal_p (call)
- ? gimple_call_internal_fn (call)
- : IFN_LAST);
- if ((! commutative_binary_fn_p (fn)
- && ! commutative_ternary_fn_p (fn))
- || first_commutative_argument (fn) != 0)
- {
- if (!swap_not_matching)
- goto fail;
- swap_not_matching = false;
- break;
- }
+ if (!swap_not_matching)
+ goto fail;
+ swap_not_matching = false;
+ break;
}
}
}
/* ??? Rejecting patterns this way doesn't work. We'd have to
do extra work to cancel the pattern so the uses see the
scalar version. */
+ /* Skip building vector operands from scalars while operand
+ discovery may still be fixed by retrying with swapped operands. */
+ && (least_upthread_swappable_op_distance != 1
+ /* A first scalar stmt mismatch signals a fatal mismatch
+ that the parent commutative retry cannot recover. */
+ || !matches[0])
&& !is_pattern_stmt_p (stmt_info)
&& !oprnd_info->any_pattern)
{