]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
vect: Don't generate scalar epilogue if not needed [PR120352]
authorTamar Christina <tamar.christina@arm.com>
Wed, 27 May 2026 09:53:07 +0000 (10:53 +0100)
committerTamar Christina <tamar.christina@arm.com>
Wed, 27 May 2026 09:54:13 +0000 (10:54 +0100)
The example loop

#define N 4
int a[N] = {0,0,0,1};
int b[N] = {0,0,0,1};

__attribute__((noipa, noinline))
int foo ()
{
  for (int i = 0; i < N; i++)
    {
      if (a[i] > b[i])
        return 1;
    }
  return 0;
}

compiled with -O3 -march=armv9-a generates

foo:
        adrp    x2, .LANCHOR0
        add     x1, x2, :lo12:.LANCHOR0
        ptrue   p7.b, vl16
        mov     w0, 0
        ldr     q30, [x2, #:lo12:.LANCHOR0]
        ldr     q31, [x1, 16]
        cmpgt   p7.s, p7/z, z30.s, z31.s
        b.any   .L7
        ret
.L7:
        ldr     w2, [x2, #:lo12:.LANCHOR0]
        ldr     w0, [x1, 16]
        cmp     w2, w0
        bgt     .L4
        ldr     w0, [x1, 4]
        ldr     w2, [x1, 20]
        cmp     w2, w0
        blt     .L4
        ldr     w0, [x1, 8]
        ldr     w2, [x1, 24]
        cmp     w2, w0
        blt     .L4
        ldr     w2, [x1, 12]
        ldr     w0, [x1, 28]
        cmp     w2, w0
        cset    w0, gt
        ret
.L4:
        mov     w0, 1
        ret

Which when we find an element, in order to return 1 we still go to scalar.
Obviously the scalar code is completely unneeded.

This patch teaches the vectorizer that when

1. We have no live values
2. We only have one exit (this is a restriction that will be lifted in a later
   patch and is there because we need masking to avoid false positives, but see
   testcase vect-early-break-no-epilog_11.c)
3. The loop has no side-effects

then we don't need the scalar epilogue at all.

e.g. for the above we now generate

foo:
        adrp    x0, .LANCHOR0
        add     x0, x0, :lo12:.LANCHOR0
        ptrue   p7.s, vl4
        ldp     q31, q30, [x0]
        cmplt   p15.s, p7/z, z30.s, z31.s
        cset    w0, any
        ret

gcc/ChangeLog:

PR tree-optimization/120352
* tree-vectorizer.h (LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG): New.
(class _loop_vec_info): Add early_break_needs_epilogue.
* tree-vect-data-refs.cc (vect_analyze_early_break_dependences): Detect
usage of stores.
* tree-vect-loop-manip.cc (vect_do_peeling): Use them.
* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Likewise.
(vect_create_loop_vinfo): Likewise.
(vect_update_ivs_after_vectorizer_for_early_breaks): Likewise.
* tree-vect-stmts.cc (vect_stmt_relevant_p): Likewise.

gcc/testsuite/ChangeLog:

PR tree-optimization/120352
* gcc.dg/vect/vect-early-break-no-epilog_1.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_10.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_11.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_2.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_3.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_4.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_5.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_6.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_7.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_8.c: New test.
* gcc.dg/vect/vect-early-break-no-epilog_9.c: New test.
* gcc.target/aarch64/noeffect.c: New test.
* gcc.target/aarch64/noeffect10.c: New test.
* gcc.target/aarch64/noeffect11.c: New test.
* gcc.target/aarch64/noeffect2.c: New test.
* gcc.target/aarch64/noeffect3.c: New test.
* gcc.target/aarch64/noeffect4.c: New test.
* gcc.target/aarch64/noeffect5.c: New test.
* gcc.target/aarch64/noeffect6.c: New test.
* gcc.target/aarch64/noeffect7.c: New test.
* gcc.target/aarch64/noeffect8.c: New test.
* gcc.target/aarch64/noeffect9.c: New test.
* gcc.target/aarch64/sve/noeffect.c: New test.
* gcc.target/aarch64/sve/noeffect10.c: New test.
* gcc.target/aarch64/sve/noeffect11.c: New test.
* gcc.target/aarch64/sve/noeffect2.c: New test.
* gcc.target/aarch64/sve/noeffect3.c: New test.
* gcc.target/aarch64/sve/noeffect4.c: New test.
* gcc.target/aarch64/sve/noeffect5.c: New test.
* gcc.target/aarch64/sve/noeffect6.c: New test.
* gcc.target/aarch64/sve/noeffect7.c: New test.
* gcc.target/aarch64/sve/noeffect8.c: New test.
* gcc.target/aarch64/sve/noeffect9.c: New test.

38 files changed:
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_10.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_11.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_3.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_4.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_5.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_6.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_7.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_8.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_9.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect10.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect11.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/noeffect9.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c [new file with mode: 0644]
gcc/tree-vect-data-refs.cc
gcc/tree-vect-loop-manip.cc
gcc/tree-vect-loop.cc
gcc/tree-vect-stmts.cc
gcc/tree-vectorizer.h

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_1.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_1.c
new file mode 100644 (file)
index 0000000..cf91783
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 4
+int a[N] = {0, 0, 0, 1};
+int b[N] = {0, 0, 0, 1};
+
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_10.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_10.c
new file mode 100644 (file)
index 0000000..86b7531
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 8
+short a[N] = {0};
+short b[N] = {0};
+
+short foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_11.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_11.c
new file mode 100644 (file)
index 0000000..3a6b72f
--- /dev/null
@@ -0,0 +1,51 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do run } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_long } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f1 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 0;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 1;
+}
+
+__attribute__ ((noipa))
+int f2 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 1;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0};
+  static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0};
+
+  if (f1 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  if (f2 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_2.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_2.c
new file mode 100644 (file)
index 0000000..6563cea
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_3.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_3.c
new file mode 100644 (file)
index 0000000..dfb398d
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_4.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_4.c
new file mode 100644 (file)
index 0000000..99bb66f
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+int foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_5.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_5.c
new file mode 100644 (file)
index 0000000..ec3eec5
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_6.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_6.c
new file mode 100644 (file)
index 0000000..46d6a89
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return a[i];
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_7.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_7.c
new file mode 100644 (file)
index 0000000..6d94312
--- /dev/null
@@ -0,0 +1,25 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-march=armv8-a+sve" { target { aarch64*-*-* } } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+int foo (void)
+{
+  for (int i = 0; i < (N / 2); i += 2)
+    {
+      if (a[i] > b[i])
+       return 1;
+
+      if (a[i + 1] > b[i + 1])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" { target { aarch64*-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_8.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_8.c
new file mode 100644 (file)
index 0000000..3236cdb
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+void
+add (int n, int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < n; i++)
+    {
+      c[i] = a[i] + b[i];
+      if (i > 1000)
+       break;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_9.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_9.c
new file mode 100644 (file)
index 0000000..c788a68
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+int c[N] = {0};
+
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i] && a[i] > c[i])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect.c b/gcc/testsuite/gcc.target/aarch64/noeffect.c
new file mode 100644 (file)
index 0000000..ba33299
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 4
+int a[N] = {0, 0, 0, 1};
+int b[N] = {0, 0, 0, 1};
+
+/*
+** foo:
+**     ...
+**     ldr     q[0-9]+, \[x[0-9]+, #:lo12:\.LANCHOR0\]
+**     ldr     q[0-9]+, \[x[0-9]+, 16\]
+**     cmgt    v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     fmov    x[0-9]+, d[0-9]+
+**     cmp     x[0-9]+, 0
+**     cset    w0, ne
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect10.c b/gcc/testsuite/gcc.target/aarch64/noeffect10.c
new file mode 100644 (file)
index 0000000..03f3e48
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 8
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ldr     q[0-9]+, \[x[0-9]+, #:lo12:\.LANCHOR0\]
+**     ldr     q[0-9]+, \[x[0-9]+, 16\]
+**     cmgt    v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     fmov    x[0-9]+, d[0-9]+
+**     cmp     x[0-9]+, 0
+**     cset    w0, ne
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect11.c b/gcc/testsuite/gcc.target/aarch64/noeffect11.c
new file mode 100644 (file)
index 0000000..82c2f00
--- /dev/null
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+**     ...
+**     cmhi    v[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d
+**     ...
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     cmp     x[0-9]+, x[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa))
+int f1 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 0;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 1;
+}
+
+/*
+** f2:
+**     ...
+**     cmhi    v[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d
+**     ...
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     cmp     x[0-9]+, x[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa))
+int f2 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 1;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+int main (void)
+{
+  static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0};
+  static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0};
+
+  if (f1 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  if (f2 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect2.c b/gcc/testsuite/gcc.target/aarch64/noeffect2.c
new file mode 100644 (file)
index 0000000..08c531f
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ldr     q[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldr     q[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     add     x[0-9]+, x[0-9]+, 16
+**     cmgt    v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     fmov    x[0-9]+, d[0-9]+
+**     cbz     x[0-9]+, \.L[0-9]+
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect3.c b/gcc/testsuite/gcc.target/aarch64/noeffect3.c
new file mode 100644 (file)
index 0000000..886ad7b
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     cmgt    v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, w[0-9]+, sxtw 2\]
+**     ldr     w[0-9]+, \[x[0-9]+, w[0-9]+, sxtw 2\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect4.c b/gcc/testsuite/gcc.target/aarch64/noeffect4.c
new file mode 100644 (file)
index 0000000..276843c
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     cmgt    v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect5.c b/gcc/testsuite/gcc.target/aarch64/noeffect5.c
new file mode 100644 (file)
index 0000000..c15e52e
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     cmgt    v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect6.c b/gcc/testsuite/gcc.target/aarch64/noeffect6.c
new file mode 100644 (file)
index 0000000..9da4f49
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     cmgt    v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return a[i];
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect7.c b/gcc/testsuite/gcc.target/aarch64/noeffect7.c
new file mode 100644 (file)
index 0000000..16fc921
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     add     x[0-9]+, x[0-9]+, 4000
+**     add     x[0-9]+, x[0-9]+, 2000
+**     b       \.L[0-9]+
+**     ldr     w[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < (N / 2); i += 2)
+    {
+      if (a[i] > b[i])
+       return 1;
+
+      if (a[i + 1] > b[i + 1])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect8.c b/gcc/testsuite/gcc.target/aarch64/noeffect8.c
new file mode 100644 (file)
index 0000000..ada7939
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** add:
+**     ...
+**     cmeq    v[0-9]+\.4s, v[0-9]+\.4s, #0
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldr     q[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     str     q[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     add     w[0-9]+, w[0-9]+, w[0-9]+
+**     str     w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ...
+**     ret
+**     ...
+*/
+void
+add (int n, int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < n; i++)
+    {
+      c[i] = a[i] + b[i];
+      if (i > 1000)
+       break;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect9.c b/gcc/testsuite/gcc.target/aarch64/noeffect9.c
new file mode 100644 (file)
index 0000000..0ce0380
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+int c[N] = {0};
+
+/*
+** foo:
+**     ...
+**     add     x[0-9]+, x[0-9]+, 4000
+**     add     x[0-9]+, x[0-9]+, 3648
+**     mov     x0, 0
+**     b       \.L[0-9]+
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i] && a[i] > c[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect.c
new file mode 100644 (file)
index 0000000..f7109b1
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 4
+int a[N] = {0, 0, 0, 1};
+int b[N] = {0, 0, 0, 1};
+
+/*
+** foo:
+**     ...
+**     ldp     q[0-9]+, q[0-9]+, \[x[0-9]+\]
+**     cmplt   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+**     ptest   p[0-9]+, p[0-9]+\.b
+**     cset    w0, any
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c
new file mode 100644 (file)
index 0000000..39ab948
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 8
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ldp     q[0-9]+, q[0-9]+, \[x[0-9]+\]
+**     cmplt   p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+**     ptest   p[0-9]+, p[0-9]+\.b
+**     cset    w0, any
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c
new file mode 100644 (file)
index 0000000..5c8c356
--- /dev/null
@@ -0,0 +1,78 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+**     ...
+**     whilelo p[0-9]+\.d, x[0-9]+, x[0-9]+
+**     ...
+**     ld1d    z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ld1d    z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     cmplo   p[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d
+**     ...
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ...
+**     cmp     x[0-9]+, x[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa))
+int f1 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 0;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 1;
+}
+
+/*
+** f2:
+**     ...
+**     whilelo p[0-9]+\.d, x[0-9]+, x[0-9]+
+**     ...
+**     ld1d    z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ld1d    z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     cmplo   p[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d
+**     ...
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ...
+**     cmp     x[0-9]+, x[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa))
+int f2 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 1;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+int main (void)
+{
+  static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0};
+  static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0};
+
+  if (f1 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  if (f2 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c
new file mode 100644 (file)
index 0000000..5bb1bad
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     whilelo p[0-9]+\.s, w[0-9]+, w[0-9]+
+**     ...
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmpgt   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+**     b\.none \.L[0-9]+
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c
new file mode 100644 (file)
index 0000000..c5f81bb
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     whilelo p[0-9]+\.s, w[0-9]+, w[0-9]+
+**     ...
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmpgt   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c
new file mode 100644 (file)
index 0000000..735b545
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ld1h    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     ld1h    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     cmpgt   p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+**     ...
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c
new file mode 100644 (file)
index 0000000..bfaec58
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ld1h    z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     ld1h    z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     cmpgt   p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+**     ...
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c
new file mode 100644 (file)
index 0000000..1443883
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ld1h    z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     ld1h    z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     cmpgt   p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+**     ...
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return a[i];
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c
new file mode 100644 (file)
index 0000000..9dab90e
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ld2w    \{z[0-9]+\.s - z[0-9]+\.s\}, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ...
+**     cmpgt   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+**     ptest   p[0-9]+, p[0-9]+\.b
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < (N / 2); i += 2)
+    {
+      if (a[i] > b[i])
+       return 1;
+
+      if (a[i + 1] > b[i + 1])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c
new file mode 100644 (file)
index 0000000..7997988
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** add:
+**     ...
+**     cmpeq   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, #1
+**     ...
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     add     z[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s
+**     st1w    z[0-9]+\.s, p[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     add     w[0-9]+, w[0-9]+, w[0-9]+
+**     str     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ...
+**     ret
+**     ...
+*/
+void
+add (int n, int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < n; i++)
+    {
+      c[i] = a[i] + b[i];
+      if (i > 1000)
+       break;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c
new file mode 100644 (file)
index 0000000..bfc5ed7
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+int c[N] = {0};
+
+/*
+** foo:
+**     ...
+**     add     x[0-9]+, x[0-9]+, 4000
+**     add     x[0-9]+, x[0-9]+, 3648
+**     mov     x0, 0
+**     b       \.L[0-9]+
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i] && a[i] > c[i])
+       return 1;
+    }
+  return 0;
+}
index da65f1d652cf005ec87c934676761481b527a855..03ac4c141d088f998a1cf6cc992e68798b600d69 100644 (file)
@@ -889,6 +889,13 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
                             dest_bb->index);
 
   LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
+  /* Check if loop has a side-effect (stores), force scalar epilogue.  */
+  for (auto dr : LOOP_VINFO_DATAREFS (loop_vinfo))
+    if (DR_IS_WRITE (dr))
+      {
+       LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
+       break;
+      }
 
   if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
     {
index 3aae0dea25b0d8e1ac37edad5b15b4550814a3f1..9653ad43e0d3d105d8616de56be59a0c60f3283f 100644 (file)
@@ -3306,12 +3306,17 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 
   /* For early breaks the scalar loop needs to execute at most VF times
      to find the element that caused the break.  */
-  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+      && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo))
     bound_epilog = vf;
 
   bool epilog_peeling = maybe_ne (bound_epilog, 0U);
   poly_uint64 bound_scalar = bound_epilog;
 
+  if (!LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) && dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "early break does not require epilog.\n");
+
   if (!prolog_peeling && !epilog_peeling)
     return NULL;
 
@@ -3501,11 +3506,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
       /* Peel prolog and put it on preheader edge of loop.  */
       edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
       edge prolog_e = NULL;
+      bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo);
       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
                                                       scalar_loop, scalar_e,
                                                       e, &prolog_e, true, NULL,
                                                       uncounted_p, uncounted_p,
-                                                      true);
+                                                      early_break_peel_p);
 
       gcc_assert (prolog);
       prolog->force_vectorize = false;
@@ -3617,11 +3623,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
       edge epilog_e = vect_epilogues ? e : scalar_e;
       edge new_epilog_e = NULL;
       auto_vec<basic_block> doms;
+      bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo);
       epilog
        = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
                                                  &new_epilog_e, true, &doms,
                                                  uncounted_p, false,
-                                                 true);
+                                                 early_break_peel_p);
 
       LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e;
       gcc_assert (epilog);
@@ -3671,6 +3678,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
          /* Handle any remaining dominator updates needed after
             inserting the loop skip edge above.  */
          if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+             && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)
              && prolog_peeling)
            {
              /* Adding a skip edge to skip a loop with multiple exits
@@ -3818,7 +3826,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
            = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
 
          if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
-               && get_loop_exit_edges (loop).length () == 1))
+               && get_loop_exit_edges (loop).length () == 1)
+             && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo))
          {
            basic_block exit_bb = NULL;
            edge update_e = NULL;
index ac7e08cf205c058cda93bddd3929acb022ec60cd..dded8b9aabffc49a59434d29f8d2bda90613fc51 100644 (file)
@@ -775,7 +775,9 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
     drs_advanced_by (NULL_TREE),
     vec_loop_main_exit (NULL),
     vec_epilogue_loop_main_exit (NULL),
-    scalar_loop_main_exit (NULL)
+    scalar_loop_main_exit (NULL),
+    early_break_needs_epilogue (false),
+    early_break_niters_var (NULL)
 {
   /* CHECKME: We want to visit all BBs before their successors (except for
      latch blocks, for which this assertion wouldn't hold).  In the simple
@@ -1705,6 +1707,13 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
   LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
     = !LOOP_VINFO_LOOP_CONDS (loop_vinfo).is_empty ();
 
+  /* At the moment we can't support no epilogs for multiple exits, result of
+     the first compare should be masked by that of the second.  We can only
+     allow it if the early exits have the same live values.  for differing
+     values we have to calculate a third mask to disambiguate. */
+  LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)
+    = LOOP_VINFO_LOOP_CONDS (loop_vinfo).length () > 1;
+
   if (info->inner_loop_cond)
     {
       /* If we have an estimate on the number of iterations of the inner
@@ -11058,11 +11067,11 @@ vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo)
 {
   DUMP_VECT_SCOPE ("vect_update_ivs_after_vectorizer_for_early_breaks");
 
-  if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+  if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+      /* If no peeling was done then we have no IV to update.  */
+      || !LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo))
     return;
 
-  gcc_assert (LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo));
-
   tree phi_var = LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo);
   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
index 09ee794300bee2d4ea687f71908800f87060cd50..4c9d871a31b85e206d03df123f34236689547871 100644 (file)
@@ -413,6 +413,7 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
              gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
 
               *live_p = true;
+             LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
            }
        }
     }
index 6d7393809013915a767985fa6a0c95e77e830743..b8a287825f43275ae06ac697a9bbf16ccc178c27 100644 (file)
@@ -1241,6 +1241,10 @@ public:
      For counted loops, this IV controls the natural exits of the loop.  */
   edge scalar_loop_main_exit;
 
+  /* Indicate if the multiple exit loop has any side-effects that require it to
+     have a scalar epilogue.  */
+  bool early_break_needs_epilogue;
+
   /* Used to store the list of stores needing to be moved if doing early
      break vectorization as they would violate the scalar loop semantics if
      vectorized in their current location.  These are stored in order that they
@@ -1325,6 +1329,7 @@ public:
 #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
 #define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
 #define LOOP_VINFO_EARLY_BREAKS(L)         (L)->early_breaks
+#define LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG(L) (L)->early_break_needs_epilogue
 #define LOOP_VINFO_EARLY_BRK_STORES(L)     (L)->early_break_stores
 #define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L)  \
   ((single_pred ((L)->loop->latch) != (L)->vec_loop_main_exit->src) \