static const int arg1_arg4_map[] = { 2, 1, 4 };
static const int arg3_arg2_map[] = { 2, 3, 2 };
static const int op1_op0_map[] = { 2, 1, 0 };
+static const int mask_call_maps[6][7] = {
+ { 1, 1, },
+ { 2, 1, 2, },
+ { 3, 1, 2, 3, },
+ { 4, 1, 2, 3, 4, },
+ { 5, 1, 2, 3, 4, 5, },
+ { 6, 1, 2, 3, 4, 5, 6 },
+};
/* For most SLP statements, there is a one-to-one mapping between
gimple arguments and child nodes. If that is not true for STMT,
case IFN_MASK_STORE:
return arg3_arg2_map;
+ case IFN_MASK_CALL:
+ {
+ unsigned nargs = gimple_call_num_args (call);
+ if (nargs >= 2 && nargs <= 7)
+ return mask_call_maps[nargs-2];
+ else
+ return nullptr;
+ }
+
default:
break;
}
if (call_stmt)
{
combined_fn cfn = gimple_call_combined_fn (call_stmt);
- if (cfn != CFN_LAST)
+ if (cfn != CFN_LAST && cfn != CFN_MASK_CALL)
rhs_code = cfn;
else
rhs_code = CALL_EXPR;
rhs_code = CFN_MASK_STORE;
}
else if ((cfn != CFN_LAST
+ && cfn != CFN_MASK_CALL
&& internal_fn_p (cfn)
&& !vectorizable_internal_fn_p (as_internal_fn (cfn)))
|| gimple_call_tail_p (call_stmt)
if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
return false;
- /* FORNOW */
- if (slp_node)
- return false;
-
/* Process function arguments. */
nargs = gimple_call_num_args (stmt) - arg_offset;
return false;
arginfo.reserve (nargs, true);
+ auto_vec<slp_tree> slp_op;
+ slp_op.safe_grow_cleared (nargs);
for (i = 0; i < nargs; i++)
{
thisarginfo.op = NULL_TREE;
thisarginfo.simd_lane_linear = false;
- op = gimple_call_arg (stmt, i + arg_offset);
- if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
- &thisarginfo.vectype)
+ int op_no = i + arg_offset;
+ if (slp_node)
+ op_no = vect_slp_child_index_for_operand (stmt, op_no);
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+ op_no, &op, &slp_op[i],
+ &thisarginfo.dt, &thisarginfo.vectype)
|| thisarginfo.dt == vect_uninitialized_def)
{
if (dump_enabled_p ())
if (thisarginfo.dt == vect_constant_def
|| thisarginfo.dt == vect_external_def)
- gcc_assert (thisarginfo.vectype == NULL_TREE);
+ {
+ gcc_assert (vec_stmt || thisarginfo.vectype == NULL_TREE);
+ if (!vec_stmt)
+ thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
+ TREE_TYPE (op),
+ slp_node);
+ }
else
gcc_assert (thisarginfo.vectype != NULL_TREE);
&& thisarginfo.dt != vect_constant_def
&& thisarginfo.dt != vect_external_def
&& loop_vinfo
- && !slp_node
&& TREE_CODE (op) == SSA_NAME)
vect_simd_lane_linear (op, loop, &thisarginfo);
arginfo.quick_push (thisarginfo);
}
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- if (!vf.is_constant ())
+ if (loop_vinfo
+ && !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
return false;
}
+ poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
+ unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
unsigned int badness = 0;
struct cgraph_node *bestn = NULL;
if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
{
unsigned int this_badness = 0;
unsigned int num_calls;
- if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
+ if (!constant_multiple_p (vf * group_size,
+ n->simdclone->simdlen, &num_calls)
|| n->simdclone->nargs != nargs)
continue;
if (num_calls != 1)
fndecl = bestn->decl;
nunits = bestn->simdclone->simdlen;
- ncopies = vector_unroll_factor (vf, nunits);
+ if (slp_node)
+ ncopies = vector_unroll_factor (vf * group_size, nunits);
+ else
+ ncopies = vector_unroll_factor (vf, nunits);
/* If the function isn't const, only allow it in simd loops where user
has asserted that at least nunits consecutive iterations can be
if (!vec_stmt) /* transformation not required. */
{
+ if (slp_node)
+ for (unsigned i = 0; i < nargs; ++i)
+ if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
/* When the original call is pure or const but the SIMD ABI dictates
an aggregate return we will have to use a virtual definition and
in a loop eventually even need to add a virtual PHI. That's
&& !gimple_vdef (stmt)
&& TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
vinfo->any_known_not_updated_vssa = true;
+ /* ??? For SLP code-gen we end up inserting after the last
+ vector argument def rather than at the original call position
+ so automagic virtual operand updating doesn't work. */
+ if (gimple_vuse (stmt) && slp_node)
+ vinfo->any_known_not_updated_vssa = true;
STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
for (i = 0; i < nargs; i++)
if ((bestn->simdclone->args[i].arg_type
auto_vec<vec<tree> > vec_oprnds;
auto_vec<unsigned> vec_oprnds_i;
- vec_oprnds.safe_grow_cleared (nargs, true);
vec_oprnds_i.safe_grow_cleared (nargs, true);
+ if (slp_node)
+ {
+ vec_oprnds.reserve_exact (nargs);
+ vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
+ }
+ else
+ vec_oprnds.safe_grow_cleared (nargs, true);
for (j = 0; j < ncopies; ++j)
{
/* Build argument list for the vectorized call. */
gcc_assert ((k & (k - 1)) == 0);
if (m == 0)
{
- vect_get_vec_defs_for_operand (vinfo, stmt_info,
- ncopies * o / k, op,
- &vec_oprnds[i]);
+ if (!slp_node)
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ ncopies * o / k, op,
+ &vec_oprnds[i]);
vec_oprnds_i[i] = 0;
vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
}
{
if (m == 0 && l == 0)
{
- vect_get_vec_defs_for_operand (vinfo, stmt_info,
- k * o * ncopies,
- op,
- &vec_oprnds[i]);
+ if (!slp_node)
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ k * o * ncopies,
+ op,
+ &vec_oprnds[i]);
vec_oprnds_i[i] = 0;
vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
}
elements as the current function. */
if (m == 0)
{
- vect_get_vec_defs_for_operand (vinfo, stmt_info,
- o * ncopies,
- op,
- &vec_oprnds[i]);
+ if (!slp_node)
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ o * ncopies,
+ op,
+ &vec_oprnds[i]);
vec_oprnds_i[i] = 0;
}
vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
if (j == 0 && l == 0)
*vec_stmt = new_stmt;
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ if (slp_node)
+ SLP_TREE_VEC_DEFS (slp_node)
+ .quick_push (gimple_assign_lhs (new_stmt));
+ else
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
if (ratype)
if ((unsigned) j == k - 1)
*vec_stmt = new_stmt;
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ if (slp_node)
+ SLP_TREE_VEC_DEFS (slp_node)
+ .quick_push (gimple_assign_lhs (new_stmt));
+ else
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
continue;
}
else if (ratype)
if (j == 0)
*vec_stmt = new_stmt;
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ if (slp_node)
+ SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
+ else
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
for (i = 0; i < nargs; ++i)