--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 4
+int a[N] = {0, 0, 0, 1};
+int b[N] = {0, 0, 0, 1};
+
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 8
+short a[N] = {0};
+short b[N] = {0};
+
+short foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do run } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_long } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f1 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ if (a[i] < b[i])
+ return 0;
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 1;
+}
+
+__attribute__ ((noipa))
+int f2 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ if (a[i] < b[i])
+ return 1;
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0};
+ static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0};
+
+ if (f1 (a, b, 3) != 1)
+ __builtin_abort ();
+
+ if (f2 (a, b, 3) != 1)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return i;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+int foo (void)
+{
+ for (unsigned short i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return i;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+short foo (void)
+{
+ for (unsigned short i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return i;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+short foo (void)
+{
+ for (unsigned short i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return a[i];
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-march=armv8-a+sve" { target { aarch64*-*-* } } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+int foo (void)
+{
+ for (int i = 0; i < (N / 2); i += 2)
+ {
+ if (a[i] > b[i])
+ return 1;
+
+ if (a[i + 1] > b[i + 1])
+ return 1;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" { target { aarch64*-*-* } } } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+void
+add (int n, int *__restrict a, int *__restrict b, int *__restrict c)
+{
+ for (int i = 0; i < n; i++)
+ {
+ c[i] = a[i] + b[i];
+ if (i > 1000)
+ break;
+ }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
--- /dev/null
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+int c[N] = {0};
+
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i] && a[i] > c[i])
+ return 1;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" { xfail *-*-* } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 4
+int a[N] = {0, 0, 0, 1};
+int b[N] = {0, 0, 0, 1};
+
+/*
+** foo:
+** ...
+** ldr q[0-9]+, \[x[0-9]+, #:lo12:\.LANCHOR0\]
+** ldr q[0-9]+, \[x[0-9]+, 16\]
+** cmgt v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** fmov x[0-9]+, d[0-9]+
+** cmp x[0-9]+, 0
+** cset w0, ne
+** ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 8
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+** ...
+** ldr q[0-9]+, \[x[0-9]+, #:lo12:\.LANCHOR0\]
+** ldr q[0-9]+, \[x[0-9]+, 16\]
+** cmgt v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** fmov x[0-9]+, d[0-9]+
+** cmp x[0-9]+, 0
+** cset w0, ne
+** ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+** ...
+** cmhi v[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d
+** ...
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** ...
+** ldr x[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ldr x[0-9]+, \[x[0-9]+, x[0-9]+\]
+** cmp x[0-9]+, x[0-9]+
+** ...
+** ret
+*/
+__attribute__ ((noipa))
+int f1 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ if (a[i] < b[i])
+ return 0;
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 1;
+}
+
+/*
+** f2:
+** ...
+** cmhi v[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d
+** ...
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** ...
+** ldr x[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ldr x[0-9]+, \[x[0-9]+, x[0-9]+\]
+** cmp x[0-9]+, x[0-9]+
+** ...
+** ret
+*/
+__attribute__ ((noipa))
+int f2 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ if (a[i] < b[i])
+ return 1;
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
+
+int main (void)
+{
+ static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0};
+ static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0};
+
+ if (f1 (a, b, 3) != 1)
+ __builtin_abort ();
+
+ if (f2 (a, b, 3) != 1)
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+** ...
+** ldr q[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ldr q[0-9]+, \[x[0-9]+, x[0-9]+\]
+** add x[0-9]+, x[0-9]+, 16
+** cmgt v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** fmov x[0-9]+, d[0-9]+
+** cbz x[0-9]+, \.L[0-9]+
+** mov w0, 1
+** ret
+** mov w0, 0
+** ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+** ...
+** cmgt v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** ...
+** ldr w[0-9]+, \[x[0-9]+, w[0-9]+, sxtw 2\]
+** ldr w[0-9]+, \[x[0-9]+, w[0-9]+, sxtw 2\]
+** cmp w[0-9]+, w[0-9]+
+** ...
+** ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+** ...
+** cmgt v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** ...
+** ldrsh w[0-9]+, \[[^\n]+\]
+** ldrsh w[0-9]+, \[[^\n]+\]
+** cmp w[0-9]+, w[0-9]+
+** ...
+** ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (unsigned short i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+** ...
+** cmgt v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** ...
+** ldrsh w[0-9]+, \[[^\n]+\]
+** ldrsh w[0-9]+, \[[^\n]+\]
+** cmp w[0-9]+, w[0-9]+
+** ...
+** ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+ for (unsigned short i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+** ...
+** cmgt v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** ...
+** ldrsh w[0-9]+, \[[^\n]+\]
+** ldrsh w[0-9]+, \[[^\n]+\]
+** cmp w[0-9]+, w[0-9]+
+** ...
+** ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+ for (unsigned short i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return a[i];
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+** ...
+** add x[0-9]+, x[0-9]+, 4000
+** add x[0-9]+, x[0-9]+, 2000
+** b \.L[0-9]+
+** ldr w[0-9]+, \[x[0-9]+[^\n]*\]
+** ...
+** cmp w[0-9]+, w[0-9]+
+** ...
+** mov w0, 1
+** ret
+** mov w0, 0
+** ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < (N / 2); i += 2)
+ {
+ if (a[i] > b[i])
+ return 1;
+
+ if (a[i + 1] > b[i + 1])
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** add:
+** ...
+** cmeq v[0-9]+\.4s, v[0-9]+\.4s, #0
+** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+** ...
+** ldr q[0-9]+, \[x[0-9]+[^\n]*\]
+** ...
+** str q[0-9]+, \[x[0-9]+[^\n]*\]
+** ...
+** ldr w[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ldr w[0-9]+, \[x[0-9]+, x[0-9]+\]
+** add w[0-9]+, w[0-9]+, w[0-9]+
+** str w[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ...
+** ret
+** ...
+*/
+void
+add (int n, int *__restrict a, int *__restrict b, int *__restrict c)
+{
+ for (int i = 0; i < n; i++)
+ {
+ c[i] = a[i] + b[i];
+ if (i > 1000)
+ break;
+ }
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+int c[N] = {0};
+
+/*
+** foo:
+** ...
+** add x[0-9]+, x[0-9]+, 4000
+** add x[0-9]+, x[0-9]+, 3648
+** mov x0, 0
+** b \.L[0-9]+
+** ...
+** ldr w[0-9]+, \[x[0-9]+[^\n]*\]
+** ...
+** cmp w[0-9]+, w[0-9]+
+** ...
+** mov w0, 1
+** ret
+** mov w0, 0
+** ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i] && a[i] > c[i])
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 4
+int a[N] = {0, 0, 0, 1};
+int b[N] = {0, 0, 0, 1};
+
+/*
+** foo:
+** ...
+** ldp q[0-9]+, q[0-9]+, \[x[0-9]+\]
+** cmplt p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+** ptest p[0-9]+, p[0-9]+\.b
+** cset w0, any
+** ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 8
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+** ...
+** ldp q[0-9]+, q[0-9]+, \[x[0-9]+\]
+** cmplt p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+** ptest p[0-9]+, p[0-9]+\.b
+** cset w0, any
+** ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+** ...
+** whilelo p[0-9]+\.d, x[0-9]+, x[0-9]+
+** ...
+** ld1d z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+** ld1d z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+** cmplo p[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d
+** ...
+** ldr x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+** ldr x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+** ...
+** cmp x[0-9]+, x[0-9]+
+** ...
+** ret
+** ...
+*/
+__attribute__ ((noipa))
+int f1 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ if (a[i] < b[i])
+ return 0;
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 1;
+}
+
+/*
+** f2:
+** ...
+** whilelo p[0-9]+\.d, x[0-9]+, x[0-9]+
+** ...
+** ld1d z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+** ld1d z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+** cmplo p[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d
+** ...
+** ldr x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+** ldr x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+** ...
+** cmp x[0-9]+, x[0-9]+
+** ...
+** ret
+** ...
+*/
+__attribute__ ((noipa))
+int f2 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ if (a[i] < b[i])
+ return 1;
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
+
+int main (void)
+{
+ static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0};
+ static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0};
+
+ if (f1 (a, b, 3) != 1)
+ __builtin_abort ();
+
+ if (f2 (a, b, 3) != 1)
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+** ...
+** whilelo p[0-9]+\.s, w[0-9]+, w[0-9]+
+** ...
+** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** cmpgt p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+** b\.none \.L[0-9]+
+** mov w0, 1
+** ret
+** mov w0, 0
+** ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+** ...
+** whilelo p[0-9]+\.s, w[0-9]+, w[0-9]+
+** ...
+** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** cmpgt p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+** ...
+** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+** cmp w[0-9]+, w[0-9]+
+** ...
+** ret
+** ...
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+** ...
+** ld1h z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+** ld1h z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+** cmpgt p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+** ...
+** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ...
+** cmp w[0-9]+, w[0-9]+
+** ...
+** ret
+** ...
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (unsigned short i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+** ...
+** ld1h z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+** ld1h z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+** cmpgt p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+** ...
+** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ...
+** cmp w[0-9]+, w[0-9]+
+** ...
+** ret
+** ...
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+ for (unsigned short i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+** ...
+** ld1h z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+** ld1h z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+** cmpgt p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+** ...
+** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\]
+** ...
+** cmp w[0-9]+, w[0-9]+
+** ...
+** ret
+** ...
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+ for (unsigned short i = 0; i < N; i++)
+ {
+ if (a[i] > b[i])
+ return a[i];
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+** ...
+** ld2w \{z[0-9]+\.s - z[0-9]+\.s\}, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** ...
+** cmpgt p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+** ptest p[0-9]+, p[0-9]+\.b
+** ...
+** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+** cmp w[0-9]+, w[0-9]+
+** ...
+** ret
+** ...
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < (N / 2); i += 2)
+ {
+ if (a[i] > b[i])
+ return 1;
+
+ if (a[i + 1] > b[i + 1])
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** add:
+** ...
+** cmpeq p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, #1
+** ...
+** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** add z[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s
+** st1w z[0-9]+\.s, p[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+** ...
+** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+** add w[0-9]+, w[0-9]+, w[0-9]+
+** str w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+** ...
+** ret
+** ...
+*/
+void
+add (int n, int *__restrict a, int *__restrict b, int *__restrict c)
+{
+ for (int i = 0; i < n; i++)
+ {
+ c[i] = a[i] + b[i];
+ if (i > 1000)
+ break;
+ }
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+int c[N] = {0};
+
+/*
+** foo:
+** ...
+** add x[0-9]+, x[0-9]+, 4000
+** add x[0-9]+, x[0-9]+, 3648
+** mov x0, 0
+** b \.L[0-9]+
+** ...
+** ldr w[0-9]+, \[x[0-9]+[^\n]*\]
+** ...
+** cmp w[0-9]+, w[0-9]+
+** ...
+** mov w0, 1
+** ret
+** mov w0, 0
+** ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ if (a[i] > b[i] && a[i] > c[i])
+ return 1;
+ }
+ return 0;
+}
dest_bb->index);
LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
+ /* Check if loop has a side-effect (stores), force scalar epilogue. */
+ for (auto dr : LOOP_VINFO_DATAREFS (loop_vinfo))
+ if (DR_IS_WRITE (dr))
+ {
+ LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
+ break;
+ }
if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
{
/* For early breaks the scalar loop needs to execute at most VF times
to find the element that caused the break. */
- if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo))
bound_epilog = vf;
bool epilog_peeling = maybe_ne (bound_epilog, 0U);
poly_uint64 bound_scalar = bound_epilog;
+ if (!LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "early break does not require epilog.\n");
+
if (!prolog_peeling && !epilog_peeling)
return NULL;
/* Peel prolog and put it on preheader edge of loop. */
edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
edge prolog_e = NULL;
+ bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo);
prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
scalar_loop, scalar_e,
e, &prolog_e, true, NULL,
uncounted_p, uncounted_p,
- true);
+ early_break_peel_p);
gcc_assert (prolog);
prolog->force_vectorize = false;
edge epilog_e = vect_epilogues ? e : scalar_e;
edge new_epilog_e = NULL;
auto_vec<basic_block> doms;
+ bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo);
epilog
= slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
&new_epilog_e, true, &doms,
uncounted_p, false,
- true);
+ early_break_peel_p);
LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e;
gcc_assert (epilog);
/* Handle any remaining dominator updates needed after
inserting the loop skip edge above. */
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)
&& prolog_peeling)
{
/* Adding a skip edge to skip a loop with multiple exits
= make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
- && get_loop_exit_edges (loop).length () == 1))
+ && get_loop_exit_edges (loop).length () == 1)
+ && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo))
{
basic_block exit_bb = NULL;
edge update_e = NULL;
drs_advanced_by (NULL_TREE),
vec_loop_main_exit (NULL),
vec_epilogue_loop_main_exit (NULL),
- scalar_loop_main_exit (NULL)
+ scalar_loop_main_exit (NULL),
+ early_break_needs_epilogue (false),
+ early_break_niters_var (NULL)
{
/* CHECKME: We want to visit all BBs before their successors (except for
latch blocks, for which this assertion wouldn't hold). In the simple
LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
= !LOOP_VINFO_LOOP_CONDS (loop_vinfo).is_empty ();
+ /* At the moment we can't support no epilogs for multiple exits, result of
+ the first compare should be masked by that of the second. We can only
+ allow it if the early exits have the same live values. for differing
+ values we have to calculate a third mask to disambiguate. */
+ LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)
+ = LOOP_VINFO_LOOP_CONDS (loop_vinfo).length () > 1;
+
if (info->inner_loop_cond)
{
/* If we have an estimate on the number of iterations of the inner
{
DUMP_VECT_SCOPE ("vect_update_ivs_after_vectorizer_for_early_breaks");
- if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ /* If no peeling was done then we have no IV to update. */
+ || !LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo))
return;
- gcc_assert (LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo));
-
tree phi_var = LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo);
tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
*live_p = true;
+ LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
}
}
}
For counted loops, this IV controls the natural exits of the loop. */
edge scalar_loop_main_exit;
+ /* Indicate if the multiple exit loop has any side-effects that require it to
+ have a scalar epilogue. */
+ bool early_break_needs_epilogue;
+
/* Used to store the list of stores needing to be moved if doing early
break vectorization as they would violate the scalar loop semantics if
vectorized in their current location. These are stored in order that they
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
#define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks
+#define LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG(L) (L)->early_break_needs_epilogue
#define LOOP_VINFO_EARLY_BRK_STORES(L) (L)->early_break_stores
#define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \
((single_pred ((L)->loop->latch) != (L)->vec_loop_main_exit->src) \