Fix PEELING_FOR_NITERS calculation (PR 87288)

author rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>

Thu, 20 Sep 2018 12:58:23 +0000 (12:58 +0000)

committer rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>

Thu, 20 Sep 2018 12:58:23 +0000 (12:58 +0000)
author rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 20 Sep 2018 12:58:23 +0000 (12:58 +0000)
committer rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 20 Sep 2018 12:58:23 +0000 (12:58 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index af5fddf10ce8951667823df8fce2810f19509fef..f4002840a8cf3325c61c875bc457875fb21ddcdd 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
+
+       PR tree-optimization/87288
+       * tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
+       into account when determining PEELING_FOR_NITERS.
+
  2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
  
         PR tree-optimization/86877
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index ade95dbb5c90c33122d25656a0407672fc3737da..c7c8e90b27c5670f7264285d75f3e2ef417201d7 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
+
+       PR tree-optimization/87288
+       * gcc.dg/vect/pr87288-1.c: New test.
+       * gcc.dg/vect/pr87288-2.c: Likewise,
+       * gcc.dg/vect/pr87288-3.c: Likewise.
+
  2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
  
         PR tree-optimization/86877
diff --git a/gcc/testsuite/gcc.dg/vect/pr87288-1.c b/gcc/testsuite/gcc.dg/vect/pr87288-1.c

new file mode 100644 (file)

index 0000000..0d0a70d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr87288-1.c
@@ -0,0 +1,49 @@
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+void __attribute__ ((noipa))
+run (int *restrict a, int *restrict b, int count)
+{
+  for (int i = 0; i < count * N; ++i)
+    {
+      a[i * 2] = b[i * 2] + count;
+      a[i * 2 + 1] = count;
+    }
+}
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+  for (int i = 0; i < count * N; ++i)
+    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+      __builtin_abort ();
+  if (a[count * 2 * N] != 999)
+    __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N * MAX_COUNT; ++i)
+    {
+      b[i * 2] = i * 41;
+      asm volatile ("" ::: "memory");
+    }
+
+  for (int i = 0; i <= MAX_COUNT; ++i)
+    {
+      a[i * 2 * N] = 999;
+      run (a, b, i);
+      check (a, i);
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 1 "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr87288-2.c b/gcc/testsuite/gcc.dg/vect/pr87288-2.c

new file mode 100644 (file)

index 0000000..e9ff9a0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr87288-2.c
@@ -0,0 +1,64 @@
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+#define RUN_COUNT(COUNT)                               \
+  void __attribute__ ((noipa))                         \
+  run_##COUNT (int *restrict a, int *restrict b)       \
+  {                                                    \
+    for (int i = 0; i < N * COUNT; ++i)                        \
+      {                                                        \
+       a[i * 2] = b[i * 2] + COUNT;                    \
+       a[i * 2 + 1] = COUNT;                           \
+      }                                                        \
+  }
+
+RUN_COUNT (1)
+RUN_COUNT (2)
+RUN_COUNT (3)
+RUN_COUNT (4)
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+  for (int i = 0; i < count * N; ++i)
+    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+      __builtin_abort ();
+  if (a[count * 2 * N] != 999)
+    __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N * MAX_COUNT; ++i)
+    {
+      b[i * 2] = i * 41;
+      asm volatile ("" ::: "memory");
+    }
+
+  a[N * 2] = 999;
+  run_1 (a, b);
+  check (a, 1);
+
+  a[N * 4] = 999;
+  run_2 (a, b);
+  check (a, 2);
+
+  a[N * 6] = 999;
+  run_3 (a, b);
+  check (a, 3);
+
+  a[N * 8] = 999;
+  run_4 (a, b);
+  check (a, 4);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr87288-3.c b/gcc/testsuite/gcc.dg/vect/pr87288-3.c

new file mode 100644 (file)

index 0000000..23f574c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr87288-3.c
@@ -0,0 +1,64 @@
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+#define RUN_COUNT(COUNT)                               \
+  void __attribute__ ((noipa))                         \
+  run_##COUNT (int *restrict a, int *restrict b)       \
+  {                                                    \
+    for (int i = 0; i < N * COUNT + 1; ++i)            \
+      {                                                        \
+       a[i * 2] = b[i * 2] + COUNT;                    \
+       a[i * 2 + 1] = COUNT;                           \
+      }                                                        \
+  }
+
+RUN_COUNT (1)
+RUN_COUNT (2)
+RUN_COUNT (3)
+RUN_COUNT (4)
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+  for (int i = 0; i < count * N + 1; ++i)
+    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+      __builtin_abort ();
+  if (a[count * 2 * N + 2] != 999)
+    __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 3], b[N * MAX_COUNT * 2 + 2];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N * MAX_COUNT + 1; ++i)
+    {
+      b[i * 2] = i * 41;
+      asm volatile ("" ::: "memory");
+    }
+
+  a[N * 2 + 2] = 999;
+  run_1 (a, b);
+  check (a, 1);
+
+  a[N * 4 + 2] = 999;
+  run_2 (a, b);
+  check (a, 2);
+
+  a[N * 6 + 2] = 999;
+  run_3 (a, b);
+  check (a, 3);
+
+  a[N * 8 + 2] = 999;
+  run_4 (a, b);
+  check (a, 4);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c

index 70e9157646fd068abedba232f650c3b858b8c4ad..fdac10bab5acd8d8e914ea134bf5f73bd2efc86a 100644 (file)
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2074,14 +2074,22 @@ start_over:
      /* The main loop handles all iterations.  */
      LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
    else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-          && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+          && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
      {
-      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)
-                      - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo),
+      /* Work out the (constant) number of iterations that need to be
+        peeled for reasons other than niters.  */
+      unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+       peel_niter += 1;
+      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
                        LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
      }
    else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+          /* ??? When peeling for gaps but not alignment, we could
+             try to check whether the (variable) niters is known to be
+             VF * N + 1.  That's something of a niche case though.  */
+          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
            || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
            || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
                 < (unsigned) exact_log2 (const_vf))
author	rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>
	Thu, 20 Sep 2018 12:58:23 +0000 (12:58 +0000)
committer	rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>
	Thu, 20 Sep 2018 12:58:23 +0000 (12:58 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/testsuite/ChangeLog		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/vect/pr87288-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/pr87288-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/pr87288-3.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-loop.c		patch \| blob \| blame \| history