]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
re PR tree-optimization/49038 (-ftree-vectorise introduces reads past end of array)
authorIra Rosen <ira.rosen@linaro.org>
Sat, 4 Jun 2011 09:12:55 +0000 (09:12 +0000)
committerIra Rosen <irar@gcc.gnu.org>
Sat, 4 Jun 2011 09:12:55 +0000 (09:12 +0000)
PR tree-optimization/49038
* tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
Ensure at least one epilogue iteration if required by data
accesses with gaps.
* tree-vectorizer.h (struct _loop_vec_info): Add new field
to mark loops that require peeling for gaps.
* tree-vect-loop.c (new_loop_vec_info): Initialize new field.
(vect_estimate_min_profitable_iters): Take peeling for gaps into
account.
(vect_transform_loop): Generate epilogue if required by data
access with gaps.
* tree-vect-data-refs.c (vect_analyze_group_access): Mark the
loop as requiring an epilogue if there are gaps in the end of
the strided group.

From-SVN: r174634

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/pr49038.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c [new file with mode: 0644]
gcc/tree-vect-data-refs.c
gcc/tree-vect-loop-manip.c
gcc/tree-vect-loop.c
gcc/tree-vectorizer.h

index 053b6d1c67e13834f801795a5e94c7d460d6caa8..bfb35c908d891be90e6582adba90aa0c51c747e6 100644 (file)
@@ -1,3 +1,20 @@
+2011-06-04  Ira Rosen  <ira.rosen@linaro.org>
+
+       PR tree-optimization/49038
+       * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
+       Ensure at least one epilogue iteration if required by data
+       accesses with gaps.
+       * tree-vectorizer.h (struct _loop_vec_info): Add new field
+       to mark loops that require peeling for gaps.
+       * tree-vect-loop.c (new_loop_vec_info): Initialize new field.
+       (vect_estimate_min_profitable_iters): Take peeling for gaps into
+       account.
+       (vect_transform_loop): Generate epilogue if required by data
+       access with gaps.
+       * tree-vect-data-refs.c (vect_analyze_group_access): Mark the
+       loop as requiring an epilogue if there are gaps in the end of
+       the strided group.
+
 2011-05-29  Richard Sandiford  <rdsandiford@googlemail.com>
 
        PR target/43700
index 9deeb8d5cca7052d55e51f801412a25f7ddc914d..87edb9e491803e6b125416cbec02ae43833e042f 100644 (file)
@@ -1,3 +1,9 @@
+2011-06-04  Ira Rosen  <ira.rosen@linaro.org>
+
+       PR tree-optimization/49038
+       * gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c: New test.
+       * gcc.dg/vect/pr49038.c: New test.
+
 2011-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>
 
        Backport from trunk
diff --git a/gcc/testsuite/gcc.dg/vect/pr49038.c b/gcc/testsuite/gcc.dg/vect/pr49038.c
new file mode 100644 (file)
index 0000000..91c214f
--- /dev/null
@@ -0,0 +1,42 @@
+#include <sys/mman.h>
+#include <stdio.h>
+
+#define COUNT 320
+#define MMAP_SIZE 0x10000
+#define ADDRESS 0x1122000000
+#define TYPE unsigned short
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+void __attribute__((noinline))
+foo (TYPE *__restrict a, TYPE *__restrict b)
+{
+  int n;
+
+  for (n = 0; n < COUNT; n++)
+    a[n] = b[n * 2];
+}
+
+int
+main (void)
+{
+  void *x;
+  size_t b_offset;
+
+  x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
+           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (x == MAP_FAILED)
+    {
+      perror ("mmap");
+      return 1;
+    }
+
+  b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE);
+  foo ((unsigned short *) x,
+       (unsigned short *) ((char *) x + b_offset));
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
new file mode 100644 (file)
index 0000000..45066a0
--- /dev/null
@@ -0,0 +1,116 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 160 
+
+typedef struct {
+   unsigned char a;
+   unsigned char b;
+   unsigned char c;
+   unsigned char d;
+   unsigned char e;
+   unsigned char f;
+   unsigned char g;
+   unsigned char h;
+} s;
+
+__attribute__ ((noinline)) int
+main1 (s *arr, int n)
+{
+  int i;
+  s *ptr = arr;
+  s res[N];
+  unsigned char x;
+
+  for (i = 0; i < N; i++)
+    {
+      res[i].a = 0;
+      res[i].b = 0;
+      res[i].c = 0;
+      res[i].d = 0;
+      res[i].e = 0;
+      res[i].f = 0;
+      res[i].g = 0;
+      res[i].h = 0;
+      __asm__ volatile ("");
+    }
+
+  /* Check peeling for gaps for unknown loop bound.  */
+  for (i = 0; i < n; i++)
+    {
+      res[i].c = ptr->b + ptr->c;
+      x = ptr->c + ptr->f;
+      res[i].a = x + ptr->b;
+      res[i].d = ptr->b + ptr->c;
+      res[i].b = ptr->c;
+      res[i].f = ptr->f + ptr->e;
+      res[i].e = ptr->b + ptr->e; 
+      res[i].h = ptr->c;   
+      res[i].g = ptr->b + ptr->c;
+      ptr++; 
+    } 
+   
+  /* check results:  */
+  for (i = 0; i < n; i++)
+    { 
+      if (res[i].c != arr[i].b + arr[i].c
+          || res[i].a != arr[i].c + arr[i].f + arr[i].b
+          || res[i].d != arr[i].b + arr[i].c
+          || res[i].b != arr[i].c
+          || res[i].f != arr[i].f + arr[i].e
+          || res[i].e != arr[i].b + arr[i].e
+          || res[i].h != arr[i].c
+          || res[i].g != arr[i].b + arr[i].c)
+        abort ();
+   }
+
+  /* Check also that we don't do more iterations than needed.  */
+  for (i = n; i < N; i++)
+    {
+      if (res[i].c == arr[i].b + arr[i].c
+          || res[i].a == arr[i].c + arr[i].f + arr[i].b
+          || res[i].d == arr[i].b + arr[i].c
+          || res[i].b == arr[i].c
+          || res[i].f == arr[i].f + arr[i].e
+          || res[i].e == arr[i].b + arr[i].e
+          || res[i].h == arr[i].c
+          || res[i].g == arr[i].b + arr[i].c)
+        abort ();
+   }
+
+  return 0;
+}
+
+
+int main (void)
+{
+  int i;
+  s arr[N];
+  
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    { 
+      arr[i].a = 5;
+      arr[i].b = 6;
+      arr[i].c = 17;
+      arr[i].d = 3;
+      arr[i].e = 16;
+      arr[i].f = 16;
+      arr[i].g = 3;
+      arr[i].h = 56;
+      if (arr[i].a == 178)
+         abort(); 
+    } 
+
+  main1 (arr, N-2);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+  
index a4d6a5f9bc4eba01951972cb2bcf1211ccbc09c6..2e0ddb04eaee0de341fcad6ad064f08c23c953a5 100644 (file)
@@ -1450,7 +1450,7 @@ vect_analyze_group_access (struct data_reference *dr)
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
-  HOST_WIDE_INT stride;
+  HOST_WIDE_INT stride, last_accessed_element = 1;
   bool slp_impossible = false;
 
   /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
@@ -1479,6 +1479,16 @@ vect_analyze_group_access (struct data_reference *dr)
              fprintf (vect_dump, " step ");
              print_generic_expr (vect_dump, step, TDF_SLIM);
            }
+
+         if (loop_vinfo)
+           {
+             LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+
+             if (vect_print_dump_info (REPORT_DETAILS))
+               fprintf (vect_dump, "Data access with gaps requires scalar "
+                                   "epilogue loop");
+           }
+
          return true;
        }
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -1531,6 +1541,7 @@ vect_analyze_group_access (struct data_reference *dr)
               next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
               continue;
             }
+
           prev = next;
 
           /* Check that all the accesses have the same STEP.  */
@@ -1561,6 +1572,8 @@ vect_analyze_group_access (struct data_reference *dr)
               gaps += diff - 1;
            }
 
+         last_accessed_element += diff;
+
           /* Store the gap from the previous member of the group. If there is no
              gap in the access, DR_GROUP_GAP is always 1.  */
           DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
@@ -1652,6 +1665,15 @@ vect_analyze_group_access (struct data_reference *dr)
             VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
                            stmt);
         }
+
+      /* There is a gap in the end of the group.  */
+      if (stride - last_accessed_element > 0 && loop_vinfo)
+       {
+         LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+         if (vect_print_dump_info (REPORT_DETAILS))
+           fprintf (vect_dump, "Data access with gaps requires scalar "
+                               "epilogue loop");
+       }
     }
 
   return true;
index f4056b058912091abc85adf92a79d76b1db91e5b..568e640f601b4b8de9599f9295c79f45daf1512c 100644 (file)
@@ -1516,7 +1516,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
   edge pe;
   basic_block new_bb;
   gimple_seq stmts;
-  tree ni_name;
+  tree ni_name, ni_minus_gap_name;
   tree var;
   tree ratio_name;
   tree ratio_mult_vf_name;
@@ -1533,9 +1533,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
 
+  /* If epilogue loop is required because of data accesses with gaps, we
+     subtract one iteration from the total number of iterations here for
+     correct calculation of RATIO.  */
+  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+    {
+      ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+                                      ni_name,
+                                      build_one_cst (TREE_TYPE (ni_name)));
+      if (!is_gimple_val (ni_minus_gap_name))
+       {
+         var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+          add_referenced_var (var);
+
+          stmts = NULL;
+          ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+                                                   true, var);
+          if (cond_expr_stmt_list)
+            gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
+          else
+            {
+              pe = loop_preheader_edge (loop);
+              new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+              gcc_assert (!new_bb);
+            }
+        }
+    }
+  else
+    ni_minus_gap_name = ni_name;
+
   /* Create: ratio = ni >> log2(vf) */
 
-  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
+  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
+                           ni_minus_gap_name, log_vf);
   if (!is_gimple_val (ratio_name))
     {
       var = create_tmp_var (TREE_TYPE (ni), "bnd");
index c8ad3a63154b1fe6d905106e63f8274968fe05dc..ad5d5d378218f1e31846a3edef105168627793b6 100644 (file)
@@ -711,6 +711,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
   LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
+  LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 
   return res;
 }
@@ -2053,6 +2054,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
          peel_iters_prologue = niters < peel_iters_prologue ?
                                        niters : peel_iters_prologue;
          peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+         /* If we need to peel for gaps, but no peeling is required, we have
+            to peel VF iterations.  */
+         if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !peel_iters_epilogue)
+           peel_iters_epilogue = vf;
        }
     }
 
@@ -4212,7 +4217,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
   do_peeling_for_loop_bound
     = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
+          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
+       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
 
   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
index c3ff0584025c3102de343d724f389b1a254c325a..bba771e565fe1ea8f6b90927a0205bde1e72a85f 100644 (file)
@@ -242,6 +242,12 @@ typedef struct _loop_vec_info {
   /* The unrolling factor needed to SLP the loop. In case of that pure SLP is
      applied to the loop, i.e., no unrolling is needed, this is 1.  */
   unsigned slp_unrolling_factor;
+
+  /* When we have strided data accesses with gaps, we may introduce invalid
+     memory accesses.  We peel the last iteration of the loop to prevent
+     this.  */
+  bool peeling_for_gaps;
+
 } *loop_vec_info;
 
 /* Access Functions.  */
@@ -266,6 +272,7 @@ typedef struct _loop_vec_info {
 #define LOOP_VINFO_STRIDED_STORES(L)       (L)->strided_stores
 #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
+#define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
 
 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
 VEC_length (gimple, (L)->may_misalign_stmts) > 0