0
};
-/* Invoke T(CODE, IFN) for each conditional function IFN that maps to a
- tree code CODE. */
+/* Invoke T(CODE, SUFFIX) for each conditional function IFN_COND_##SUFFIX
+ that maps to a tree code CODE. There is also an IFN_COND_LEN_##SUFFIX
+ for each such IFN_COND_##SUFFIX. */
#define FOR_EACH_CODE_MAPPING(T) \
- T (PLUS_EXPR, IFN_COND_ADD) \
- T (MINUS_EXPR, IFN_COND_SUB) \
- T (MULT_EXPR, IFN_COND_MUL) \
- T (TRUNC_DIV_EXPR, IFN_COND_DIV) \
- T (TRUNC_MOD_EXPR, IFN_COND_MOD) \
- T (RDIV_EXPR, IFN_COND_RDIV) \
- T (MIN_EXPR, IFN_COND_MIN) \
- T (MAX_EXPR, IFN_COND_MAX) \
- T (BIT_AND_EXPR, IFN_COND_AND) \
- T (BIT_IOR_EXPR, IFN_COND_IOR) \
- T (BIT_XOR_EXPR, IFN_COND_XOR) \
- T (LSHIFT_EXPR, IFN_COND_SHL) \
- T (RSHIFT_EXPR, IFN_COND_SHR) \
- T (NEGATE_EXPR, IFN_COND_NEG)
+ T (PLUS_EXPR, ADD) \
+ T (MINUS_EXPR, SUB) \
+ T (MULT_EXPR, MUL) \
+ T (TRUNC_DIV_EXPR, DIV) \
+ T (TRUNC_MOD_EXPR, MOD) \
+ T (RDIV_EXPR, RDIV) \
+ T (MIN_EXPR, MIN) \
+ T (MAX_EXPR, MAX) \
+ T (BIT_AND_EXPR, AND) \
+ T (BIT_IOR_EXPR, IOR) \
+ T (BIT_XOR_EXPR, XOR) \
+ T (LSHIFT_EXPR, SHL) \
+ T (RSHIFT_EXPR, SHR) \
+ T (NEGATE_EXPR, NEG)
/* Return a function that only performs CODE when a certain condition is met
and that uses a given fallback value otherwise. For example, if CODE is
{
switch (code)
{
-#define CASE(CODE, IFN) case CODE: return IFN;
+#define CASE(CODE, IFN) case CODE: return IFN_COND_##IFN;
FOR_EACH_CODE_MAPPING(CASE)
#undef CASE
default:
{
switch (ifn)
{
-#define CASE(CODE, IFN) case IFN: return CODE;
+#define CASE(CODE, IFN) case IFN_COND_##IFN: return CODE;
FOR_EACH_CODE_MAPPING(CASE)
#undef CASE
default:
}
}
+/* Like get_conditional_internal_fn, but return a function that
+ additionally restricts the operation to the leading elements
+ of a vector. The number of elements to process is given by a length
+ and bias pair, as for IFN_LOAD_LEN. The values of the remaining
+ elements are taken from the fallback ("else") argument.
+
+ For example, if CODE is a binary operation associated with FN:
+
+ LHS = FN (COND, A, B, ELSE, LEN, BIAS)
+
+ is equivalent to the C code:
+
+ for (int i = 0; i < NUNITS; i++)
+ {
+ if (i < LEN + BIAS && COND[i])
+ LHS[i] = A[i] CODE B[i];
+ else
+ LHS[i] = ELSE[i];
+ }
+*/
+
+internal_fn
+get_conditional_len_internal_fn (tree_code code)
+{
+ switch (code)
+ {
+#define CASE(CODE, IFN) case CODE: return IFN_COND_LEN_##IFN;
+ FOR_EACH_CODE_MAPPING(CASE)
+#undef CASE
+ default:
+ return IFN_LAST;
+ }
+}
+
/* Invoke T(IFN) for each internal function IFN that also has an
IFN_COND_* form. */
#define FOR_EACH_COND_FN_PAIR(T) \
int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
+ vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
internal_fn cond_fn = get_conditional_internal_fn (code);
+ internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
/* If operating on inactive elements could generate spurious traps,
we need to restrict the operation to active lanes. Note that this
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
&& mask_out_inactive)
{
- if (cond_fn == IFN_LAST
- || !direct_internal_fn_supported_p (cond_fn, vectype,
- OPTIMIZE_FOR_SPEED))
+ if (cond_fn != IFN_LAST
+ && direct_internal_fn_supported_p (cond_fn, vectype,
+ OPTIMIZE_FOR_SPEED))
+ vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+ vectype, NULL);
+ else if (cond_len_fn != IFN_LAST
+ && direct_internal_fn_supported_p (cond_len_fn, vectype,
+ OPTIMIZE_FOR_SPEED))
+ vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
+ 1);
+ else
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
" conditional operation is available.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
- else
- vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
- vectype, NULL);
}
/* Put types on constant and invariant SLP children. */
"transform binary/unary operation.\n");
bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
/* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
vectors with unsigned elements, but the result is signed. So, we
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
- else if (masked_loop_p && mask_out_inactive)
+ else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
{
- tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
- vec_num * ncopies, vectype, i);
- auto_vec<tree> vops (5);
+ tree mask;
+ if (masked_loop_p)
+ mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
+ vec_num * ncopies, vectype, i);
+ else
+ /* Dummy mask. */
+ mask = build_minus_one_cst (truth_type_for (vectype));
+ auto_vec<tree> vops (6);
vops.quick_push (mask);
vops.quick_push (vop0);
if (vop1)
(cond_fn, vectype, vops.length () - 1, &vops[1]);
vops.quick_push (else_value);
}
- gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
+ if (len_loop_p)
+ {
+ tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
+ vec_num * ncopies, vectype, i, 1);
+ signed char biasval
+ = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ tree bias = build_int_cst (intQI_type_node, biasval);
+ vops.quick_push (len);
+ vops.quick_push (bias);
+ }
+ gcall *call
+ = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
+ : cond_len_fn,
+ vops);
new_temp = make_ssa_name (vec_dest, call);
gimple_call_set_lhs (call, new_temp);
gimple_call_set_nothrow (call, true);