--- /dev/null
+#include "tree-vect.h"
+
+#define N 16
+
+void __attribute__((noipa))
+f (int *restrict y, int *restrict x, int *restrict indices)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ y[i * 2] = (indices[i * 2] < N * 2
+ ? x[indices[i * 2]] + 1
+ : 1);
+ y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
+ ? x[indices[i * 2 + 1]] + 2
+ : 2);
+ }
+}
+
+int y[N * 2];
+int x[N * 2] = {
+ 72704, 52152, 51301, 96681,
+ 57937, 60490, 34504, 60944,
+ 42225, 28333, 88336, 74300,
+ 29250, 20484, 38852, 91536,
+ 86917, 63941, 31590, 21998,
+ 22419, 26974, 28668, 13968,
+ 3451, 20247, 44089, 85521,
+ 22871, 87362, 50555, 85939
+};
+int indices[N * 2] = {
+ 15, 0x10000, 0xcafe0, 19,
+ 7, 22, 19, 1,
+ 0x20000, 0x70000, 15, 30,
+ 5, 12, 11, 11,
+ 10, 25, 5, 20,
+ 22, 24, 32, 28,
+ 30, 19, 6, 0xabcdef,
+ 7, 12, 8, 21
+};
+int expected[N * 2] = {
+ 91537, 2, 1, 22000,
+ 60945, 28670, 21999, 52154,
+ 1, 2, 91537, 50557,
+ 60491, 29252, 74301, 74302,
+ 88337, 20249, 60491, 22421,
+ 28669, 3453, 1, 22873,
+ 50556, 22000, 34505, 2,
+ 60945, 29252, 42226, 26976
+};
+
+int
+main (void)
+{
+ check_vect ();
+
+ f (y, x, indices);
+ for (int i = 0; i < 32; ++i)
+ if (y[i] != expected[i])
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { vect_gather_load_ifn && vect_masked_load } } } } */
--- /dev/null
+/* { dg-do compile } */
+
+#define N 16
+
+void
+f1 (int *restrict y, int *restrict x1, int *restrict x2,
+ int *restrict indices)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ y[i * 2] = (indices[i * 2] < N * 2
+ ? x1[indices[i * 2]] + 1
+ : 1);
+ y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
+ ? x2[indices[i * 2 + 1]] + 2
+ : 2);
+ }
+}
+
+void
+f2 (int *restrict y, int *restrict x, int *restrict indices)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ y[i * 2] = (indices[i * 2] < N * 2
+ ? x[indices[i * 2]] + 1
+ : 1);
+ y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
+ ? x[indices[i * 2 + 1] * 2] + 2
+ : 2);
+ }
+}
+
+void
+f3 (int *restrict y, int *restrict x, int *restrict indices)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ y[i * 2] = (indices[i * 2] < N * 2
+ ? x[indices[i * 2]] + 1
+ : 1);
+ y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
+ ? x[(unsigned int) indices[i * 2 + 1]] + 2
+ : 2);
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-vect-cost-model" } */
+
+#include <stdint.h>
+
+void
+f1 (int32_t *restrict y, int32_t *restrict x, int32_t *restrict index)
+{
+ for (int i = 0; i < 100; ++i)
+ {
+ y[i * 2] = (index[i * 2] < 128
+ ? x[index[i * 2]] + 1
+ : 1);
+ y[i * 2 + 1] = (index[i * 2 + 1] < 128
+ ? x[index[i * 2 + 1]] + 2
+ : 2);
+ }
+}
+
+void
+f2 (int32_t *restrict y, int32_t *restrict x, uint32_t *restrict index)
+{
+ for (int i = 0; i < 100; ++i)
+ {
+ y[i * 2] = (index[i * 2] < 128
+ ? x[index[i * 2]] + 1
+ : 1);
+ y[i * 2 + 1] = (index[i * 2 + 1] < 128
+ ? x[index[i * 2 + 1]] + 2
+ : 2);
+ }
+}
+
+void
+f3 (int32_t *restrict y, int32_t *restrict x, uint64_t *restrict index)
+{
+ for (int i = 0; i < 100; ++i)
+ {
+ y[i * 2] = (index[i * 2] < 128
+ ? x[index[i * 2]] + 1
+ : 1);
+ y[i * 2 + 1] = (index[i * 2 + 1] < 128
+ ? x[index[i * 2 + 1]] + 2
+ : 2);
+ }
+}
+
+void
+f4 (int64_t *restrict y, int64_t *restrict x, uint64_t *restrict index)
+{
+ for (int i = 0; i < 100; ++i)
+ {
+ y[i * 2] = (index[i * 2] < 128
+ ? x[index[i * 2]] + 1
+ : 1);
+ y[i * 2 + 1] = (index[i * 2 + 1] < 128
+ ? x[index[i * 2 + 1]] + 2
+ : 2);
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw #?2\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw #?2\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl #?2\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl #?3\]} 1 } } */
};
static const int arg1_map[] = { 1, 1 };
static const int arg2_map[] = { 1, 2 };
+static const int arg1_arg4_map[] = { 2, 1, 4 };
/* For most SLP statements, there is a one-to-one mapping between
gimple arguments and child nodes. If that is not true for STMT,
case IFN_GATHER_LOAD:
return arg1_map;
+ case IFN_MASK_GATHER_LOAD:
+ return arg1_arg4_map;
+
default:
break;
}
else
rhs_code = CALL_EXPR;
- if (cfn == CFN_MASK_LOAD || cfn == CFN_GATHER_LOAD)
+ if (cfn == CFN_MASK_LOAD
+ || cfn == CFN_GATHER_LOAD
+ || cfn == CFN_MASK_GATHER_LOAD)
load_p = true;
else if ((internal_fn_p (cfn)
&& !vectorizable_internal_fn_p (as_internal_fn (cfn)))
} /* Grouped access. */
else
{
- if (load_p && rhs_code != CFN_GATHER_LOAD)
+ if (load_p
+ && rhs_code != CFN_GATHER_LOAD
+ && rhs_code != CFN_MASK_GATHER_LOAD)
{
/* Not grouped load. */
if (dump_enabled_p ())
{
if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
- || gimple_call_internal_p (stmt, IFN_GATHER_LOAD));
+ || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
+ || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
else
{
*max_nunits = this_max_nunits;
return false;
tree mask = NULL_TREE, mask_vectype = NULL_TREE;
+ int mask_index = -1;
if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
{
scalar_dest = gimple_assign_lhs (assign);
if (!scalar_dest)
return false;
- int mask_index = internal_fn_mask_index (ifn);
+ mask_index = internal_fn_mask_index (ifn);
+ /* ??? For SLP the mask operand is always last. */
+ if (mask_index >= 0 && slp_node)
+ mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
if (mask_index >= 0
- && !vect_check_scalar_mask (vinfo, stmt_info, slp_node,
- /* ??? For SLP we only have operands for
- the mask operand. */
- slp_node ? 0 : mask_index,
+ && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
&mask, NULL, &mask_dt, &mask_vectype))
return false;
}
vec<tree> vec_offsets = vNULL;
auto_vec<tree> vec_masks;
if (mask)
- vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
- mask, &vec_masks, mask_vectype, NULL_TREE);
+ {
+ if (slp_node)
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
+ &vec_masks);
+ else
+ vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
+ &vec_masks, mask_vectype);
+ }
tree vec_mask = NULL_TREE;
poly_uint64 group_elt = 0;
for (j = 0; j < ncopies; j++)