data-ref: Tighten index-based alias checks [PR99726]

author Richard Sandiford <richard.sandiford@arm.com>

Wed, 31 Mar 2021 18:34:01 +0000 (19:34 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Wed, 31 Mar 2021 18:34:01 +0000 (19:34 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Wed, 31 Mar 2021 18:34:01 +0000 (19:34 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Wed, 31 Mar 2021 18:34:01 +0000 (19:34 +0100)
diff --git a/gcc/testsuite/gcc.target/i386/pr99726.c b/gcc/testsuite/gcc.target/i386/pr99726.c

new file mode 100644 (file)

index 0000000..ff19bca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99726.c
@@ -0,0 +1,15 @@
+/* { dg-options "-flive-patching=inline-clone -mavx512f -O2 -floop-nest-optimize -ftree-loop-vectorize -ftrapv -m32" } */
+
+extern int a[256][1024];
+int b;
+long c, d;
+unsigned int e;
+
+int
+main ()
+{
+  for (; e < d; e++)
+    for (unsigned j = 1; j < c; j++)
+      a[e][j] = b * a[e - 1][j + 1];
+  return 0;
+}
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c

index 124a7bea6a94161556a6622fa7b113b3cef98bcf..e6dd5f15bedebf7d53954e8e8925a94b26102bea 100644 (file)
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -2147,8 +2147,8 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
  
    bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
  
-  unsigned int i;
-  for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
+  int found = -1;
+  for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
      {
        tree access1 = DR_ACCESS_FN (dr_a.dr, i);
        tree access2 = DR_ACCESS_FN (dr_b.dr, i);
@@ -2164,155 +2164,166 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
  
           return false;
         }
-      /* The two indices must have the same step.  */
-      if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
+      if (found >= 0)
         return false;
+      found = i;
+    }
  
-      tree idx_step = CHREC_RIGHT (access1);
-      /* Index must have const step, otherwise DR_STEP won't be constant.  */
-      gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
-      /* Index must evaluate in the same direction as DR.  */
-      gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
+  /* Ought not to happen in practice, since if all accesses are equal then the
+     alias should be decidable at compile time.  */
+  if (found < 0)
+    return false;
  
-      tree min1 = CHREC_LEFT (access1);
-      tree min2 = CHREC_LEFT (access2);
-      if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
-       return false;
+  /* The two indices must have the same step.  */
+  tree access1 = DR_ACCESS_FN (dr_a.dr, found);
+  tree access2 = DR_ACCESS_FN (dr_b.dr, found);
+  if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
+    return false;
  
-      /* Ideally, alias can be checked against loop's control IV, but we
-        need to prove linear mapping between control IV and reference
-        index.  Although that should be true, we check against (array)
-        index of data reference.  Like segment length, index length is
-        linear function of the number of iterations with index_step as
-        the coefficient, i.e, niter_len * idx_step.  */
-      offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
-                                                 SIGNED);
-      if (neg_step)
-       abs_idx_step = -abs_idx_step;
-      poly_offset_int idx_len1 = abs_idx_step * niter_len1;
-      poly_offset_int idx_len2 = abs_idx_step * niter_len2;
-      poly_offset_int idx_access1 = abs_idx_step * niter_access1;
-      poly_offset_int idx_access2 = abs_idx_step * niter_access2;
+  tree idx_step = CHREC_RIGHT (access1);
+  /* Index must have const step, otherwise DR_STEP won't be constant.  */
+  gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
+  /* Index must evaluate in the same direction as DR.  */
+  gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
  
-      gcc_assert (known_ge (idx_len1, 0)
-                 && known_ge (idx_len2, 0)
-                 && known_ge (idx_access1, 0)
-                 && known_ge (idx_access2, 0));
+  tree min1 = CHREC_LEFT (access1);
+  tree min2 = CHREC_LEFT (access2);
+  if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
+    return false;
  
-      /* Each access has the following pattern, with lengths measured
-        in units of INDEX:
+  /* Ideally, alias can be checked against loop's control IV, but we
+     need to prove linear mapping between control IV and reference
+     index.  Although that should be true, we check against (array)
+     index of data reference.  Like segment length, index length is
+     linear function of the number of iterations with index_step as
+     the coefficient, i.e, niter_len * idx_step.  */
+  offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
+                                             SIGNED);
+  if (neg_step)
+    abs_idx_step = -abs_idx_step;
+  poly_offset_int idx_len1 = abs_idx_step * niter_len1;
+  poly_offset_int idx_len2 = abs_idx_step * niter_len2;
+  poly_offset_int idx_access1 = abs_idx_step * niter_access1;
+  poly_offset_int idx_access2 = abs_idx_step * niter_access2;
  
-             <-- idx_len -->
-             <--- A: -ve step --->
-             +-----+-------+-----+-------+-----+
-             | n-1 | ..... |  0  | ..... | n-1 |
-             +-----+-------+-----+-------+-----+
-                           <--- B: +ve step --->
-                           <-- idx_len -->
-                           |
-                          min
+  gcc_assert (known_ge (idx_len1, 0)
+             && known_ge (idx_len2, 0)
+             && known_ge (idx_access1, 0)
+             && known_ge (idx_access2, 0));
  
-        where "n" is the number of scalar iterations covered by the segment
-        and where each access spans idx_access units.
+  /* Each access has the following pattern, with lengths measured
+     in units of INDEX:
  
-        A is the range of bytes accessed when the step is negative,
-        B is the range when the step is positive.
+         <-- idx_len -->
+         <--- A: -ve step --->
+         +-----+-------+-----+-------+-----+
+         | n-1 | ..... |  0  | ..... | n-1 |
+         +-----+-------+-----+-------+-----+
+                       <--- B: +ve step --->
+                       <-- idx_len -->
+                       |
+                      min
  
-        When checking for general overlap, we need to test whether
-        the range:
+     where "n" is the number of scalar iterations covered by the segment
+     and where each access spans idx_access units.
  
-          [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
+     A is the range of bytes accessed when the step is negative,
+     B is the range when the step is positive.
  
-        overlaps:
+     When checking for general overlap, we need to test whether
+     the range:
  
-          [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
+       [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
  
-        where:
+     overlaps:
+
+       [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
  
-           low_offsetN = +ve step ? 0 : -idx_lenN;
-          high_offsetN = +ve step ? idx_lenN : 0;
+     where:
  
-        This is equivalent to testing whether:
+       low_offsetN = +ve step ? 0 : -idx_lenN;
+       high_offsetN = +ve step ? idx_lenN : 0;
+
+     This is equivalent to testing whether:
  
-          min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
-          && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
+       min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
+       && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
  
-        Converting this into a single test, there is an overlap if:
+     Converting this into a single test, there is an overlap if:
  
-          0 <= min2 - min1 + bias <= limit
+       0 <= min2 - min1 + bias <= limit
  
-        where  bias = high_offset2 + idx_access2 - 1 - low_offset1
-              limit = (high_offset1 - low_offset1 + idx_access1 - 1)
-                    + (high_offset2 - low_offset2 + idx_access2 - 1)
-         i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
+     where  bias = high_offset2 + idx_access2 - 1 - low_offset1
+          limit = (high_offset1 - low_offset1 + idx_access1 - 1)
+                + (high_offset2 - low_offset2 + idx_access2 - 1)
+      i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
  
-        Combining the tests requires limit to be computable in an unsigned
-        form of the index type; if it isn't, we fall back to the usual
-        pointer-based checks.
+     Combining the tests requires limit to be computable in an unsigned
+     form of the index type; if it isn't, we fall back to the usual
+     pointer-based checks.
  
-        We can do better if DR_B is a write and if DR_A and DR_B are
-        well-ordered in both the original and the new code (see the
-        comment above the DR_ALIAS_* flags for details).  In this case
-        we know that for each i in [0, n-1], the write performed by
-        access i of DR_B occurs after access numbers j<=i of DR_A in
-        both the original and the new code.  Any write or anti
-        dependencies wrt those DR_A accesses are therefore maintained.
+     We can do better if DR_B is a write and if DR_A and DR_B are
+     well-ordered in both the original and the new code (see the
+     comment above the DR_ALIAS_* flags for details).  In this case
+     we know that for each i in [0, n-1], the write performed by
+     access i of DR_B occurs after access numbers j<=i of DR_A in
+     both the original and the new code.  Any write or anti
+     dependencies wrt those DR_A accesses are therefore maintained.
  
-        We just need to make sure that each individual write in DR_B does not
-        overlap any higher-indexed access in DR_A; such DR_A accesses happen
-        after the DR_B access in the original code but happen before it in
-        the new code.
+     We just need to make sure that each individual write in DR_B does not
+     overlap any higher-indexed access in DR_A; such DR_A accesses happen
+     after the DR_B access in the original code but happen before it in
+     the new code.
  
-        We know the steps for both accesses are equal, so by induction, we
-        just need to test whether the first write of DR_B overlaps a later
-        access of DR_A.  In other words, we need to move min1 along by
-        one iteration:
+     We know the steps for both accesses are equal, so by induction, we
+     just need to test whether the first write of DR_B overlaps a later
+     access of DR_A.  In other words, we need to move min1 along by
+     one iteration:
  
-          min1' = min1 + idx_step
+       min1' = min1 + idx_step
  
-        and use the ranges:
+     and use the ranges:
  
-          [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
+       [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
  
-        and:
+     and:
  
-          [min2, min2 + idx_access2 - 1]
+       [min2, min2 + idx_access2 - 1]
  
-        where:
+     where:
  
-           low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
-          high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
-      if (waw_or_war_p)
-       idx_len1 -= abs_idx_step;
+       low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
+       high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
+  if (waw_or_war_p)
+    idx_len1 -= abs_idx_step;
  
-      poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
-      if (!waw_or_war_p)
-       limit += idx_len2;
+  poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
+  if (!waw_or_war_p)
+    limit += idx_len2;
  
-      tree utype = unsigned_type_for (TREE_TYPE (min1));
-      if (!wi::fits_to_tree_p (limit, utype))
-       return false;
+  tree utype = unsigned_type_for (TREE_TYPE (min1));
+  if (!wi::fits_to_tree_p (limit, utype))
+    return false;
  
-      poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
-      poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
-      poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
-      /* Equivalent to adding IDX_STEP to MIN1.  */
-      if (waw_or_war_p)
-       bias -= wi::to_offset (idx_step);
-
-      tree subject = fold_build2 (MINUS_EXPR, utype,
-                                 fold_convert (utype, min2),
-                                 fold_convert (utype, min1));
-      subject = fold_build2 (PLUS_EXPR, utype, subject,
-                            wide_int_to_tree (utype, bias));
-      tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
-                                        wide_int_to_tree (utype, limit));
-      if (*cond_expr)
-       *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
-                                 *cond_expr, part_cond_expr);
-      else
-       *cond_expr = part_cond_expr;
-    }
+  poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
+  poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
+  poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
+  /* Equivalent to adding IDX_STEP to MIN1.  */
+  if (waw_or_war_p)
+    bias -= wi::to_offset (idx_step);
+
+  tree subject = fold_build2 (MINUS_EXPR, utype,
+                             fold_convert (utype, min2),
+                             fold_convert (utype, min1));
+  subject = fold_build2 (PLUS_EXPR, utype, subject,
+                        wide_int_to_tree (utype, bias));
+  tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
+                                    wide_int_to_tree (utype, limit));
+  if (*cond_expr)
+    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+                             *cond_expr, part_cond_expr);
+  else
+    *cond_expr = part_cond_expr;
    if (dump_enabled_p ())
      {
        if (waw_or_war_p)
author	Richard Sandiford <richard.sandiford@arm.com>
	Wed, 31 Mar 2021 18:34:01 +0000 (19:34 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Wed, 31 Mar 2021 18:34:01 +0000 (19:34 +0100)
gcc/testsuite/gcc.target/i386/pr99726.c	[new file with mode: 0644]	patch \| blob
gcc/tree-data-ref.c		patch \| blob \| blame \| history