--- /dev/null
+#include "tree-vect.h"
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ check_vect ();
+
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
--- /dev/null
+#include "tree-vect.h"
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ check_vect ();
+
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
--- /dev/null
+#include "tree-vect.h"
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ check_vect ();
+
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
--- /dev/null
+#include "tree-vect.h"
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ check_vect ();
+
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
--- /dev/null
+#include "tree-vect.h"
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ check_vect ();
+
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
--- /dev/null
+#include "tree-vect.h"
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ check_vect ();
+
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
--- /dev/null
+#include "tree-vect.h"
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ check_vect ();
+
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
--- /dev/null
+#include "tree-vect.h"
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ check_vect ();
+
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
}
}
+/* Set *SBOOL_FN to the corresponding function working on vector masks
+ for REDUC_FN. Return true if that exists, false otherwise. */
+
+static bool
+sbool_reduction_fn_for_fn (internal_fn reduc_fn, internal_fn *sbool_fn)
+{
+ switch (reduc_fn)
+ {
+ case IFN_REDUC_AND:
+ *sbool_fn = IFN_REDUC_SBOOL_AND;
+ return true;
+ case IFN_REDUC_IOR:
+ *sbool_fn = IFN_REDUC_SBOOL_IOR;
+ return true;
+ case IFN_REDUC_XOR:
+ *sbool_fn = IFN_REDUC_SBOOL_XOR;
+ return true;
+ default:
+ return false;
+ }
+}
+
/* If there is a neutral value X such that a reduction would not be affected
by the introduction of additional X elements, return that X, otherwise
return null. CODE is the code of the reduction and SCALAR_TYPE is type
if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
nunits = group_size;
+ tree vector_elt_type = TREE_TYPE (vector_type);
number_of_places_left_in_vector = nunits;
bool constant_p = true;
tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits);
gimple_seq ctor_seq = NULL;
if (neutral_op
- && !useless_type_conversion_p (TREE_TYPE (vector_type),
+ && !useless_type_conversion_p (vector_elt_type,
TREE_TYPE (neutral_op)))
- neutral_op = gimple_convert (&ctor_seq,
- TREE_TYPE (vector_type),
- neutral_op);
+ neutral_op = gimple_convert (&ctor_seq, vector_elt_type, neutral_op);
for (j = 0; j < nunits * number_of_vectors; ++j)
{
tree op;
op = neutral_op;
else
{
- if (!useless_type_conversion_p (TREE_TYPE (vector_type),
+ if (!useless_type_conversion_p (vector_elt_type,
TREE_TYPE (initial_values[i])))
- initial_values[i] = gimple_convert (&ctor_seq,
- TREE_TYPE (vector_type),
- initial_values[i]);
+ {
+ if (VECTOR_BOOLEAN_TYPE_P (vector_type))
+ initial_values[i] = gimple_build (&ctor_seq, COND_EXPR,
+ vector_elt_type,
+ initial_values[i],
+ build_all_ones_cst
+ (vector_elt_type),
+ build_zero_cst
+ (vector_elt_type));
+ else
+ initial_values[i] = gimple_convert (&ctor_seq,
+ vector_elt_type,
+ initial_values[i]);
+ }
op = initial_values[i];
}
/* Shouldn't be used beyond this point. */
exit_bb = nullptr;
+ /* If we are operating on a mask vector and do not support direct mask
+ reduction, work on a bool data vector instead of a mask vector. */
+ if (VECTOR_BOOLEAN_TYPE_P (vectype)
+ && VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info)
+ && vectype != VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info))
+ {
+ gcc_assert (reduc_inputs.length () == 1);
+ vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info);
+ gimple_seq stmts = NULL;
+ reduc_inputs[0] = gimple_build (&stmts, VEC_COND_EXPR, vectype,
+ reduc_inputs[0],
+ build_one_cst (vectype),
+ build_zero_cst (vectype));
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+ }
+
if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION
&& reduc_fn != IFN_LAST)
{
new_temp = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE (vectype1),
new_temp, bitsize, bitsize_zero_node);
- new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR,
- scalar_type, new_temp);
+ new_temp = gimple_convert (&stmts, scalar_type, new_temp);
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
scalar_results.safe_push (new_temp);
}
tree vectype_out = SLP_TREE_VECTYPE (slp_for_stmt_info);
VECT_REDUC_INFO_VECTYPE (reduc_info) = vectype_out;
- /* We do not handle mask reductions correctly in the epilogue. */
- if (VECTOR_BOOLEAN_TYPE_P (vectype_out))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "mask reduction not supported.\n");
- return false;
- }
-
gimple_match_op op;
if (!gimple_extract_op (stmt_info->stmt, &op))
gcc_unreachable ();
return false;
}
+ /* See if we can convert a mask vector to a corresponding bool data vector
+ to perform the epilogue reduction. */
+ tree alt_vectype_out = NULL_TREE;
+ if (VECTOR_BOOLEAN_TYPE_P (vectype_out))
+ {
+ alt_vectype_out
+ = get_related_vectype_for_scalar_type (loop_vinfo->vector_mode,
+ TREE_TYPE (vectype_out),
+ TYPE_VECTOR_SUBPARTS
+ (vectype_out));
+ if (!alt_vectype_out
+ || maybe_ne (TYPE_VECTOR_SUBPARTS (alt_vectype_out),
+ TYPE_VECTOR_SUBPARTS (vectype_out))
+ || !expand_vec_cond_expr_p (alt_vectype_out, vectype_out))
+ alt_vectype_out = NULL_TREE;
+ }
+
internal_fn reduc_fn = IFN_LAST;
if (reduction_type == TREE_CODE_REDUCTION
|| reduction_type == FOLD_LEFT_REDUCTION
? fold_left_reduction_fn (orig_code, &reduc_fn)
: reduction_fn_for_scalar_code (orig_code, &reduc_fn))
{
- if (reduc_fn != IFN_LAST
- && !direct_internal_fn_supported_p (reduc_fn, vectype_out,
- OPTIMIZE_FOR_SPEED))
+ internal_fn sbool_fn = IFN_LAST;
+ if (reduc_fn == IFN_LAST)
+ ;
+ else if ((!VECTOR_BOOLEAN_TYPE_P (vectype_out)
+ || (GET_MODE_CLASS (TYPE_MODE (vectype_out))
+ == MODE_VECTOR_BOOL))
+ && direct_internal_fn_supported_p (reduc_fn, vectype_out,
+ OPTIMIZE_FOR_SPEED))
+ ;
+ else if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
+ && sbool_reduction_fn_for_fn (reduc_fn, &sbool_fn)
+ && direct_internal_fn_supported_p (sbool_fn, vectype_out,
+ OPTIMIZE_FOR_SPEED))
+ reduc_fn = sbool_fn;
+ else if (reduction_type != FOLD_LEFT_REDUCTION
+ && alt_vectype_out
+ && direct_internal_fn_supported_p (reduc_fn, alt_vectype_out,
+ OPTIMIZE_FOR_SPEED))
+ VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info) = alt_vectype_out;
+ else
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
return false;
}
+ if (reduc_fn == IFN_LAST
+ && VECTOR_BOOLEAN_TYPE_P (vectype_out))
+ {
+ if (!alt_vectype_out)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot turn mask into bool data vector for "
+ "reduction epilogue.\n");
+ return false;
+ }
+ VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info) = alt_vectype_out;
+ }
}
else if (reduction_type == COND_REDUCTION)
{
/* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
a vector mask type instead of a normal vector type. Record the
- result in STMT_INFO->mask_precision. */
+ result in STMT_INFO->mask_precision. Returns true when the
+ precision changed. */
-static void
+static bool
vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
{
if (!possible_vector_mask_operation_p (stmt_info))
- return;
+ return false;
/* If at least one boolean input uses a vector mask type,
pick the mask type with the narrowest elements.
scalar_mode mode;
tree vectype, mask_type;
if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
- && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
- && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
+ /* Do not allow this to set vinfo->vector_mode, this might
+ disrupt the result for the next iteration. */
+ && (vectype = get_related_vectype_for_scalar_type
+ (vinfo->vector_mode, op0_type))
+ && (mask_type = truth_type_for (vectype))
&& expand_vec_cmp_expr_p (vectype, mask_type, code))
precision = GET_MODE_BITSIZE (mode);
}
}
}
- if (dump_enabled_p ())
+ if (stmt_info->mask_precision != precision)
{
- if (precision == ~0U)
- dump_printf_loc (MSG_NOTE, vect_location,
- "using normal nonmask vectors for %G",
- stmt_info->stmt);
- else
- dump_printf_loc (MSG_NOTE, vect_location,
- "using boolean precision %d for %G",
- precision, stmt_info->stmt);
- }
+ if (dump_enabled_p ())
+ {
+ if (precision == ~0U)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "using normal nonmask vectors for %G",
+ stmt_info->stmt);
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "using boolean precision %d for %G",
+ precision, stmt_info->stmt);
+ }
- stmt_info->mask_precision = precision;
+ /* ??? We'd like to assert stmt_info->mask_precision == 0
+ || stmt_info->mask_precision > precision, thus that we only
+ decrease mask precisions throughout iteration, but the
+ tcc_comparison handling above means for comparisons of bools
+ we start with 8 but might increase in case the bools get mask
+ precision on their own. */
+ stmt_info->mask_precision = precision;
+ return true;
+ }
+ return false;
}
/* Handle vect_determine_precisions for STMT_INFO, given that we
DUMP_VECT_SCOPE ("vect_determine_precisions");
- for (unsigned int i = 0; i < nbbs; i++)
+ /* For mask precisions we have to iterate since otherwise we do not
+ get reduction PHI precision correct. For now do this only for
+ loop vectorization. */
+ bool changed;
+ do
{
- basic_block bb = bbs[i];
- for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
- if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
- vect_determine_mask_precision (vinfo, stmt_info);
- }
- for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ changed = false;
+ for (unsigned int i = 0; i < nbbs; i++)
{
- stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
- if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
- vect_determine_mask_precision (vinfo, stmt_info);
+ basic_block bb = bbs[i];
+ for (auto gsi = gsi_start_phis (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
+ if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
+ changed |= vect_determine_mask_precision (vinfo, stmt_info);
+ }
+ for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
+ if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
+ changed |= vect_determine_mask_precision (vinfo, stmt_info);
+ }
}
}
+ while (changed && is_a <loop_vec_info> (vinfo));
+
for (unsigned int i = 0; i < nbbs; i++)
{
basic_block bb = bbs[nbbs - i - 1];
/* The vector type for performing the actual reduction operation. */
tree reduc_vectype;
+ /* The vector type we should use for the final reduction in the epilogue
+ when we reduce a mask. */
+ tree reduc_vectype_for_mask;
+
/* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */
tree induc_cond_initial_val;
#define VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL(I) ((I)->induc_cond_initial_val)
#define VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT(I) ((I)->reduc_epilogue_adjustment)
#define VECT_REDUC_INFO_VECTYPE(I) ((I)->reduc_vectype)
+#define VECT_REDUC_INFO_VECTYPE_FOR_MASK(I) ((I)->reduc_vectype_for_mask)
#define VECT_REDUC_INFO_FORCE_SINGLE_CYCLE(I) ((I)->force_single_cycle)
#define VECT_REDUC_INFO_RESULT_POS(I) ((I)->reduc_result_pos)