bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
- unsigned int i;
- for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
+ int found = -1;
+ for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
{
tree access1 = DR_ACCESS_FN (dr_a.dr, i);
tree access2 = DR_ACCESS_FN (dr_b.dr, i);
return false;
}
- /* The two indices must have the same step. */
- if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
+ if (found >= 0)
return false;
+ found = i;
+ }
- tree idx_step = CHREC_RIGHT (access1);
- /* Index must have const step, otherwise DR_STEP won't be constant. */
- gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
- /* Index must evaluate in the same direction as DR. */
- gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
+ /* Ought not to happen in practice, since if all accesses are equal then the
+ alias should be decidable at compile time. */
+ if (found < 0)
+ return false;
- tree min1 = CHREC_LEFT (access1);
- tree min2 = CHREC_LEFT (access2);
- if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
- return false;
+ /* The two indices must have the same step. */
+ tree access1 = DR_ACCESS_FN (dr_a.dr, found);
+ tree access2 = DR_ACCESS_FN (dr_b.dr, found);
+ if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
+ return false;
- /* Ideally, alias can be checked against loop's control IV, but we
- need to prove linear mapping between control IV and reference
- index. Although that should be true, we check against (array)
- index of data reference. Like segment length, index length is
- linear function of the number of iterations with index_step as
- the coefficient, i.e, niter_len * idx_step. */
- offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
- SIGNED);
- if (neg_step)
- abs_idx_step = -abs_idx_step;
- poly_offset_int idx_len1 = abs_idx_step * niter_len1;
- poly_offset_int idx_len2 = abs_idx_step * niter_len2;
- poly_offset_int idx_access1 = abs_idx_step * niter_access1;
- poly_offset_int idx_access2 = abs_idx_step * niter_access2;
+ tree idx_step = CHREC_RIGHT (access1);
+ /* Index must have const step, otherwise DR_STEP won't be constant. */
+ gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
+ /* Index must evaluate in the same direction as DR. */
+ gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
- gcc_assert (known_ge (idx_len1, 0)
- && known_ge (idx_len2, 0)
- && known_ge (idx_access1, 0)
- && known_ge (idx_access2, 0));
+ tree min1 = CHREC_LEFT (access1);
+ tree min2 = CHREC_LEFT (access2);
+ if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
+ return false;
- /* Each access has the following pattern, with lengths measured
- in units of INDEX:
+ /* Ideally, alias can be checked against loop's control IV, but we
+ need to prove linear mapping between control IV and reference
+ index. Although that should be true, we check against (array)
+ index of data reference. Like segment length, index length is
+ linear function of the number of iterations with index_step as
+ the coefficient, i.e, niter_len * idx_step. */
+ offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
+ SIGNED);
+ if (neg_step)
+ abs_idx_step = -abs_idx_step;
+ poly_offset_int idx_len1 = abs_idx_step * niter_len1;
+ poly_offset_int idx_len2 = abs_idx_step * niter_len2;
+ poly_offset_int idx_access1 = abs_idx_step * niter_access1;
+ poly_offset_int idx_access2 = abs_idx_step * niter_access2;
- <-- idx_len -->
- <--- A: -ve step --->
- +-----+-------+-----+-------+-----+
- | n-1 | ..... | 0 | ..... | n-1 |
- +-----+-------+-----+-------+-----+
- <--- B: +ve step --->
- <-- idx_len -->
- |
- min
+ gcc_assert (known_ge (idx_len1, 0)
+ && known_ge (idx_len2, 0)
+ && known_ge (idx_access1, 0)
+ && known_ge (idx_access2, 0));
- where "n" is the number of scalar iterations covered by the segment
- and where each access spans idx_access units.
+ /* Each access has the following pattern, with lengths measured
+ in units of INDEX:
- A is the range of bytes accessed when the step is negative,
- B is the range when the step is positive.
+ <-- idx_len -->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ..... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <-- idx_len -->
+ |
+ min
- When checking for general overlap, we need to test whether
- the range:
+ where "n" is the number of scalar iterations covered by the segment
+ and where each access spans idx_access units.
- [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
- overlaps:
+ When checking for general overlap, we need to test whether
+ the range:
- [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
+ [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
- where:
+ overlaps:
+
+ [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
- low_offsetN = +ve step ? 0 : -idx_lenN;
- high_offsetN = +ve step ? idx_lenN : 0;
+ where:
- This is equivalent to testing whether:
+ low_offsetN = +ve step ? 0 : -idx_lenN;
+ high_offsetN = +ve step ? idx_lenN : 0;
+
+ This is equivalent to testing whether:
- min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
- && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
+ min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
+ && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
- Converting this into a single test, there is an overlap if:
+ Converting this into a single test, there is an overlap if:
- 0 <= min2 - min1 + bias <= limit
+ 0 <= min2 - min1 + bias <= limit
- where bias = high_offset2 + idx_access2 - 1 - low_offset1
- limit = (high_offset1 - low_offset1 + idx_access1 - 1)
- + (high_offset2 - low_offset2 + idx_access2 - 1)
- i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
+ where bias = high_offset2 + idx_access2 - 1 - low_offset1
+ limit = (high_offset1 - low_offset1 + idx_access1 - 1)
+ + (high_offset2 - low_offset2 + idx_access2 - 1)
+ i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
- Combining the tests requires limit to be computable in an unsigned
- form of the index type; if it isn't, we fall back to the usual
- pointer-based checks.
+ Combining the tests requires limit to be computable in an unsigned
+ form of the index type; if it isn't, we fall back to the usual
+ pointer-based checks.
- We can do better if DR_B is a write and if DR_A and DR_B are
- well-ordered in both the original and the new code (see the
- comment above the DR_ALIAS_* flags for details). In this case
- we know that for each i in [0, n-1], the write performed by
- access i of DR_B occurs after access numbers j<=i of DR_A in
- both the original and the new code. Any write or anti
- dependencies wrt those DR_A accesses are therefore maintained.
+ We can do better if DR_B is a write and if DR_A and DR_B are
+ well-ordered in both the original and the new code (see the
+ comment above the DR_ALIAS_* flags for details). In this case
+ we know that for each i in [0, n-1], the write performed by
+ access i of DR_B occurs after access numbers j<=i of DR_A in
+ both the original and the new code. Any write or anti
+ dependencies wrt those DR_A accesses are therefore maintained.
- We just need to make sure that each individual write in DR_B does not
- overlap any higher-indexed access in DR_A; such DR_A accesses happen
- after the DR_B access in the original code but happen before it in
- the new code.
+ We just need to make sure that each individual write in DR_B does not
+ overlap any higher-indexed access in DR_A; such DR_A accesses happen
+ after the DR_B access in the original code but happen before it in
+ the new code.
- We know the steps for both accesses are equal, so by induction, we
- just need to test whether the first write of DR_B overlaps a later
- access of DR_A. In other words, we need to move min1 along by
- one iteration:
+ We know the steps for both accesses are equal, so by induction, we
+ just need to test whether the first write of DR_B overlaps a later
+ access of DR_A. In other words, we need to move min1 along by
+ one iteration:
- min1' = min1 + idx_step
+ min1' = min1 + idx_step
- and use the ranges:
+ and use the ranges:
- [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
+ [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
- and:
+ and:
- [min2, min2 + idx_access2 - 1]
+ [min2, min2 + idx_access2 - 1]
- where:
+ where:
- low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
- high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
- if (waw_or_war_p)
- idx_len1 -= abs_idx_step;
+ low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
+ high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
+ if (waw_or_war_p)
+ idx_len1 -= abs_idx_step;
- poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
- if (!waw_or_war_p)
- limit += idx_len2;
+ poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
+ if (!waw_or_war_p)
+ limit += idx_len2;
- tree utype = unsigned_type_for (TREE_TYPE (min1));
- if (!wi::fits_to_tree_p (limit, utype))
- return false;
+ tree utype = unsigned_type_for (TREE_TYPE (min1));
+ if (!wi::fits_to_tree_p (limit, utype))
+ return false;
- poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
- poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
- poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
- /* Equivalent to adding IDX_STEP to MIN1. */
- if (waw_or_war_p)
- bias -= wi::to_offset (idx_step);
-
- tree subject = fold_build2 (MINUS_EXPR, utype,
- fold_convert (utype, min2),
- fold_convert (utype, min1));
- subject = fold_build2 (PLUS_EXPR, utype, subject,
- wide_int_to_tree (utype, bias));
- tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
- wide_int_to_tree (utype, limit));
- if (*cond_expr)
- *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
- *cond_expr, part_cond_expr);
- else
- *cond_expr = part_cond_expr;
- }
+ poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
+ poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
+ poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
+ /* Equivalent to adding IDX_STEP to MIN1. */
+ if (waw_or_war_p)
+ bias -= wi::to_offset (idx_step);
+
+ tree subject = fold_build2 (MINUS_EXPR, utype,
+ fold_convert (utype, min2),
+ fold_convert (utype, min1));
+ subject = fold_build2 (PLUS_EXPR, utype, subject,
+ wide_int_to_tree (utype, bias));
+ tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
+ wide_int_to_tree (utype, limit));
+ if (*cond_expr)
+ *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+ *cond_expr, part_cond_expr);
+ else
+ *cond_expr = part_cond_expr;
if (dump_enabled_p ())
{
if (waw_or_war_p)