vect: use wider precision type for generating early break scalar IV [PR123089]

author Tamar Christina <tamar.christina@arm.com>

Sun, 21 Dec 2025 08:27:13 +0000 (08:27 +0000)

committer Tamar Christina <tamar.christina@arm.com>

Sun, 21 Dec 2025 08:27:13 +0000 (08:27 +0000)
author Tamar Christina <tamar.christina@arm.com>
Sun, 21 Dec 2025 08:27:13 +0000 (08:27 +0000)
committer Tamar Christina <tamar.christina@arm.com>
Sun, 21 Dec 2025 08:27:13 +0000 (08:27 +0000)
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c

new file mode 100644 (file)

index 0000000..431edbf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
@@ -0,0 +1,40 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target avx2_runtime { target { i?86-*-* x86_64-*-* } } } */
+
+/* { dg-additional-options "-O3 -fno-strict-aliasing -march=znver3" { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target { i?86-*-* x86_64-*-* } } } } */
+
+#include "tree-vect.h"
+
+struct
+{
+  int d;
+  short e;
+} i;
+
+int b;
+int *h = &b;
+
+int
+main ()
+{
+  check_vect ();
+
+  short f = 1;
+  short *g = &i.e;
+
+a:
+  if (*g = 0 & ++f, *h)
+    ;
+  else
+    {
+      int c = 0;
+      if (f)
+        goto a;
+      h = &c;
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14.c

new file mode 100644 (file)

index 0000000..b2f4650
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14.c
@@ -0,0 +1,24 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows.  */
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+  unsigned char i = 0;
+#pragma GCC unroll 0
+  for (i = START; i < n; ++i)
+    {
+      if (x[i] == 0)
+        return i;
+      x[i] += 1;
+    }
+  return i;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c

new file mode 100644 (file)

index 0000000..fab939b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c
@@ -0,0 +1,42 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows.  */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+  unsigned char i = 0;
+#pragma GCC unroll 0
+  for (i = START; i < n; ++i)
+    {
+      if (x[i] == 0)
+        return i;
+      x[i] += 1;
+    }
+  return i;
+}
+
+int main ()
+{
+   int max = 255 - START;
+   int x[255 - START];
+#pragma GCC unroll 0
+   for (int i = 0; i < max; i++)
+     x[i] = 1;
+
+   x[200] = 0;
+   int res = foo (max, x);
+   if (res != 200)
+     __builtin_abort ();
+
+   if (x[START] != 2)
+     __builtin_abort ();
+
+   if (x[0] != 1)
+     __builtin_abort ();
+   return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15.c

new file mode 100644 (file)

index 0000000..b2f4650
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15.c
@@ -0,0 +1,24 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows.  */
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+  unsigned char i = 0;
+#pragma GCC unroll 0
+  for (i = START; i < n; ++i)
+    {
+      if (x[i] == 0)
+        return i;
+      x[i] += 1;
+    }
+  return i;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c

new file mode 100644 (file)

index 0000000..13763f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c
@@ -0,0 +1,42 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows.  */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+  unsigned char i = 0;
+#pragma GCC unroll 0
+  for (i = START; i < n; ++i)
+    {
+      if (x[i] == 0)
+        return i;
+      x[i] += 1;
+    }
+  return i;
+}
+
+int main ()
+{
+   int max = 255 - START;
+   int x[255 - START];
+#pragma GCC unroll 0
+   for (int i = 0; i < max; i++)
+     x[i] = 1;
+
+   x[33] = 0;
+   int res = foo (max, x);
+   if (res != 33)
+     __builtin_abort ();
+
+   if (x[START] != 2)
+     __builtin_abort ();
+
+   if (x[0] != 1)
+     __builtin_abort ();
+   return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16.c

new file mode 100644 (file)

index 0000000..b2f4650
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16.c
@@ -0,0 +1,24 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows.  */
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+  unsigned char i = 0;
+#pragma GCC unroll 0
+  for (i = START; i < n; ++i)
+    {
+      if (x[i] == 0)
+        return i;
+      x[i] += 1;
+    }
+  return i;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c

new file mode 100644 (file)

index 0000000..120f737
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c
@@ -0,0 +1,41 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows.  */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+  unsigned char i = 0;
+#pragma GCC unroll 0
+  for (i = START; i < n; ++i)
+    {
+      if (x[i] == 0)
+        return i;
+      x[i] += 1;
+    }
+  return i;
+}
+
+int main ()
+{
+   int max = 255 - START;
+   int x[255 - START];
+#pragma GCC unroll 0
+   for (int i = 0; i < max; i++)
+     x[i] = 1;
+
+   int res = foo (max, x);
+   if (res != max)
+     __builtin_abort ();
+
+   if (x[START] != 2)
+     __builtin_abort ();
+
+   if (x[0] != 1)
+     __builtin_abort ();
+   return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17.c

new file mode 100644 (file)

index 0000000..5395a75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17.c
@@ -0,0 +1,24 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows.  */
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only  -fdump-tree-vect-details" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (int *x)
+{
+  unsigned long i = 0;
+#pragma GCC unroll 0
+  for (i = START; i < 253; ++i)
+    {
+      if (x[i] == 0)
+        return i;
+      x[i] += 1;
+    }
+  return i;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17_run.c

new file mode 100644 (file)

index 0000000..7316350
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17_run.c
@@ -0,0 +1,41 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows.  */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (int *x)
+{
+  unsigned int i = 0;
+#pragma GCC unroll 0
+  for (i = START; i < 253; ++i)
+    {
+      if (x[i] == 0)
+        return i;
+      x[i] += 1;
+    }
+  return i;
+}
+
+int main ()
+{
+   int x[255 - START];
+#pragma GCC unroll 0
+   for (int i = 0; i < 253; i++)
+     x[i] = 1;
+
+   x[200] = 0;
+   int res = foo (x);
+   if (res != 200)
+     __builtin_abort ();
+
+   if (x[START] != 2)
+     __builtin_abort ();
+
+   if (x[0] != 1)
+     __builtin_abort ();
+   return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc

index 708c17dabe0b163d770a1c88a709698259396985..785b38b48f6dd315abdeca227280996ed40885a2 100644 (file)
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3742,10 +3742,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
        tree vector_iters_vf = niters_vector_mult_vf;
        if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
         {
-         tree vector_iters_vf_type = uncounted_p ? sizetype
-                                                 : TREE_TYPE (vector_iters_vf);
-         tree scal_iv_ty = signed_type_for (vector_iters_vf_type);
-         tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
+         tree tmp_niters_vf
+           = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
  
           if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
                 && get_loop_exit_edges (loop).length () == 1))
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc

index b15589a711b135d661eff5773811a6aa09d5e56a..0c275a9edeb475fb5014195854b7348b301a175e 100644 (file)
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -928,7 +928,7 @@ vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
     as an unsigned integer, where MAX_NITERS is the maximum number of
     loop header iterations for the original scalar form of LOOP_VINFO.  */
  
-static unsigned
+unsigned
  vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
  {
    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -11055,10 +11055,15 @@ vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo)
       final IV.  */
    if (niters_skip)
      {
-      induc_def = gimple_build (&iv_stmts, MAX_EXPR, TREE_TYPE (induc_def),
-                               induc_def,
-                               build_zero_cst (TREE_TYPE (induc_def)));
-      auto stmt = gimple_build_assign (phi_var, induc_def);
+      tree induc_type = TREE_TYPE (induc_def);
+      tree s_induc_type = signed_type_for (induc_type);
+      induc_def = gimple_build (&iv_stmts, MAX_EXPR, s_induc_type,
+                               gimple_convert (&iv_stmts, s_induc_type,
+                                               induc_def),
+                               build_zero_cst (s_induc_type));
+      auto stmt = gimple_build_assign (phi_var,
+                                      gimple_convert (&iv_stmts, induc_type,
+                                                      induc_def));
        gimple_seq_add_stmt_without_update (&iv_stmts, stmt);
        basic_block exit_bb = NULL;
        /* Identify the early exit merge block.  I wish we had stored this.  */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index d5a50a39409b032a0dec86f59e2985629067ce55..a2f345c97d1c94ecdcfaf8e50461157e90127a7a 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12784,6 +12784,67 @@ supports_vector_compare_and_branch (loop_vec_info loop_vinfo, machine_mode mode)
    return direct_optab_handler (cbranch_optab, mode) != CODE_FOR_nothing;
  }
  
+/* Determine the type to use for early break vectorization's scalar IV.  If
+   no type is possible return false.  */
+
+static bool
+vect_compute_type_for_early_break_scalar_iv (loop_vec_info loop_vinfo)
+{
+  /* Check if we have a usable scalar IV type for vectorization.  */
+  tree iters_vf_type = sizetype;
+  if (!LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
+    {
+      /* Find the type with the minimum precision we can use
+        for the scalar IV.  */
+      tree cand_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
+
+      /* Work out how many bits we need to represent the limit.  */
+      unsigned int min_ni_width
+       = vect_min_prec_for_max_niters (loop_vinfo, 1);
+
+      /* Check if we're using PFA, if so we need a signed IV and an
+        extra bit for the sign.  */
+      if (TYPE_UNSIGNED (cand_type)
+         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+         && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+       min_ni_width += 1;
+
+      if (TYPE_PRECISION (cand_type) >= min_ni_width)
+       iters_vf_type = unsigned_type_for (cand_type);
+      else
+       {
+         opt_scalar_int_mode cmp_mode_iter;
+         tree iv_type = NULL_TREE;
+         FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
+           {
+             auto cmp_mode = cmp_mode_iter.require ();
+             unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode);
+             if (cmp_bits >= min_ni_width
+                 && targetm.scalar_mode_supported_p (cmp_mode))
+               {
+                 iv_type = build_nonstandard_integer_type (cmp_bits, true);
+                 if (iv_type)
+                   break;
+               }
+           }
+
+         if (!iv_type)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                      "can't vectorize early exit because the "
+                      "target doesn't support a scalar type wide "
+                      "wide enough to hold niters.\n");
+             return false;
+           }
+         iters_vf_type = iv_type;
+       }
+    }
+
+  LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo) = iters_vf_type;
+  return true;
+}
+
  /* Check to see if the current early break given in STMT_INFO is valid for
     vectorization.  */
  
@@ -12897,6 +12958,9 @@ vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
             vect_record_loop_mask (loop_vinfo, masks, vec_num, vectype, NULL);
         }
  
+      if (!vect_compute_type_for_early_break_scalar_iv (loop_vinfo))
+       return false;
+
        return true;
      }
  
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 8f7483297ea8d578c465c8bc4be0186ea81ba333..840af10a7a2393f9517d8a33458dec4890cdc859 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1249,6 +1249,10 @@ public:
       inside the relavent exit blocks in order to adjust for early break.  */
    tree early_break_niters_var;
  
+  /* The type of the variable to be used to create the scalar IV for early break
+     loops.  */
+  tree early_break_iv_type;
+
    /* Record statements that are needed to be live for early break vectorization
       but may not have an LC PHI node materialized yet in the exits.  */
    auto_vec<stmt_vec_info> early_break_live_ivs;
@@ -1320,6 +1324,7 @@ public:
  #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
  #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
  #define LOOP_VINFO_EARLY_BRK_NITERS_VAR(L) (L)->early_break_niters_var
+#define LOOP_VINFO_EARLY_BRK_IV_TYPE(L)    (L)->early_break_iv_type
  #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
  #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
  #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
@@ -2676,7 +2681,7 @@ extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
  extern gimple_seq vect_gen_len (tree, tree, tree, tree);
  extern vect_reduc_info info_for_reduction (loop_vec_info, slp_tree);
  extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
-
+extern unsigned vect_min_prec_for_max_niters (loop_vec_info, unsigned int);
  /* Drive for loop transformation stage.  */
  extern class loop *vect_transform_loop (loop_vec_info, gimple *);
  struct vect_loop_form_info
author	Tamar Christina <tamar.christina@arm.com>
	Sun, 21 Dec 2025 08:27:13 +0000 (08:27 +0000)
committer	Tamar Christina <tamar.christina@arm.com>
	Sun, 21 Dec 2025 08:27:13 +0000 (08:27 +0000)
gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17_run.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-loop-manip.cc		patch \| blob \| blame \| history
gcc/tree-vect-loop.cc		patch \| blob \| blame \| history
gcc/tree-vect-stmts.cc		patch \| blob \| blame \| history
gcc/tree-vectorizer.h		patch \| blob \| blame \| history