--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target avx2_runtime { target { i?86-*-* x86_64-*-* } } } */
+
+/* { dg-additional-options "-O3 -fno-strict-aliasing -march=znver3" { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target { i?86-*-* x86_64-*-* } } } } */
+
+#include "tree-vect.h"
+
+struct
+{
+ int d;
+ short e;
+} i;
+
+int b;
+int *h = &b;
+
+int
+main ()
+{
+ check_vect ();
+
+ short f = 1;
+ short *g = &i.e;
+
+a:
+ if (*g = 0 & ++f, *h)
+ ;
+ else
+ {
+ int c = 0;
+ if (f)
+ goto a;
+ h = &c;
+ }
+
+ return 0;
+}
--- /dev/null
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
--- /dev/null
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int max = 255 - START;
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < max; i++)
+ x[i] = 1;
+
+ x[200] = 0;
+ int res = foo (max, x);
+ if (res != 200)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
--- /dev/null
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
--- /dev/null
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int max = 255 - START;
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < max; i++)
+ x[i] = 1;
+
+ x[33] = 0;
+ int res = foo (max, x);
+ if (res != 33)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
--- /dev/null
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
--- /dev/null
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int max = 255 - START;
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < max; i++)
+ x[i] = 1;
+
+ int res = foo (max, x);
+ if (res != max)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
--- /dev/null
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (int *x)
+{
+ unsigned long i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < 253; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
--- /dev/null
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (int *x)
+{
+ unsigned int i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < 253; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < 253; i++)
+ x[i] = 1;
+
+ x[200] = 0;
+ int res = foo (x);
+ if (res != 200)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
tree vector_iters_vf = niters_vector_mult_vf;
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
{
- tree vector_iters_vf_type = uncounted_p ? sizetype
- : TREE_TYPE (vector_iters_vf);
- tree scal_iv_ty = signed_type_for (vector_iters_vf_type);
- tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
+ tree tmp_niters_vf
+ = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
&& get_loop_exit_edges (loop).length () == 1))
as an unsigned integer, where MAX_NITERS is the maximum number of
loop header iterations for the original scalar form of LOOP_VINFO. */
-static unsigned
+unsigned
vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
final IV. */
if (niters_skip)
{
- induc_def = gimple_build (&iv_stmts, MAX_EXPR, TREE_TYPE (induc_def),
- induc_def,
- build_zero_cst (TREE_TYPE (induc_def)));
- auto stmt = gimple_build_assign (phi_var, induc_def);
+ tree induc_type = TREE_TYPE (induc_def);
+ tree s_induc_type = signed_type_for (induc_type);
+ induc_def = gimple_build (&iv_stmts, MAX_EXPR, s_induc_type,
+ gimple_convert (&iv_stmts, s_induc_type,
+ induc_def),
+ build_zero_cst (s_induc_type));
+ auto stmt = gimple_build_assign (phi_var,
+ gimple_convert (&iv_stmts, induc_type,
+ induc_def));
gimple_seq_add_stmt_without_update (&iv_stmts, stmt);
basic_block exit_bb = NULL;
/* Identify the early exit merge block. I wish we had stored this. */
return direct_optab_handler (cbranch_optab, mode) != CODE_FOR_nothing;
}
+/* Determine the type to use for early break vectorization's scalar IV. If
+ no type is possible return false. */
+
+static bool
+vect_compute_type_for_early_break_scalar_iv (loop_vec_info loop_vinfo)
+{
+ /* Check if we have a usable scalar IV type for vectorization. */
+ tree iters_vf_type = sizetype;
+ if (!LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
+ {
+ /* Find the type with the minimum precision we can use
+ for the scalar IV. */
+ tree cand_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
+
+ /* Work out how many bits we need to represent the limit. */
+ unsigned int min_ni_width
+ = vect_min_prec_for_max_niters (loop_vinfo, 1);
+
+ /* Check if we're using PFA, if so we need a signed IV and an
+ extra bit for the sign. */
+ if (TYPE_UNSIGNED (cand_type)
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ min_ni_width += 1;
+
+ if (TYPE_PRECISION (cand_type) >= min_ni_width)
+ iters_vf_type = unsigned_type_for (cand_type);
+ else
+ {
+ opt_scalar_int_mode cmp_mode_iter;
+ tree iv_type = NULL_TREE;
+ FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
+ {
+ auto cmp_mode = cmp_mode_iter.require ();
+ unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode);
+ if (cmp_bits >= min_ni_width
+ && targetm.scalar_mode_supported_p (cmp_mode))
+ {
+ iv_type = build_nonstandard_integer_type (cmp_bits, true);
+ if (iv_type)
+ break;
+ }
+ }
+
+ if (!iv_type)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't vectorize early exit because the "
+ "target doesn't support a scalar type wide "
+ "wide enough to hold niters.\n");
+ return false;
+ }
+ iters_vf_type = iv_type;
+ }
+ }
+
+ LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo) = iters_vf_type;
+ return true;
+}
+
/* Check to see if the current early break given in STMT_INFO is valid for
vectorization. */
vect_record_loop_mask (loop_vinfo, masks, vec_num, vectype, NULL);
}
+ if (!vect_compute_type_for_early_break_scalar_iv (loop_vinfo))
+ return false;
+
return true;
}
inside the relavent exit blocks in order to adjust for early break. */
tree early_break_niters_var;
+ /* The type of the variable to be used to create the scalar IV for early break
+ loops. */
+ tree early_break_iv_type;
+
/* Record statements that are needed to be live for early break vectorization
but may not have an LC PHI node materialized yet in the exits. */
auto_vec<stmt_vec_info> early_break_live_ivs;
#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
#define LOOP_VINFO_EARLY_BRK_NITERS_VAR(L) (L)->early_break_niters_var
+#define LOOP_VINFO_EARLY_BRK_IV_TYPE(L) (L)->early_break_iv_type
#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
#define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond
#define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
extern gimple_seq vect_gen_len (tree, tree, tree, tree);
extern vect_reduc_info info_for_reduction (loop_vec_info, slp_tree);
extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
-
+extern unsigned vect_min_prec_for_max_niters (loop_vec_info, unsigned int);
/* Drive for loop transformation stage. */
extern class loop *vect_transform_loop (loop_vec_info, gimple *);
struct vect_loop_form_info