/* Data references and dependences detectors.
- Copyright (C) 2003-2018 Free Software Foundation, Inc.
+ Copyright (C) 2003-2019 Free Software Foundation, Inc.
Contributed by Sebastian Pop <pop@cri.ensmp.fr>
This file is part of GCC.
#include "tree-affine.h"
#include "params.h"
#include "builtins.h"
+#include "tree-eh.h"
+#include "ssa.h"
static struct datadep_stats
{
int i;
for (i = 0; i < n; i++)
- fprintf (outfile, "%3d ", vector[i]);
+ fprintf (outfile, "%3d ", (int)vector[i]);
fprintf (outfile, "\n");
}
dump_subscript (outf, sub);
}
- fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
fprintf (outf, " loop nest: (");
FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
fprintf (outf, "%d ", loopi->num);
dump_ddrs (stderr, ddrs);
}
+static void
+split_constant_offset (tree exp, tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache);
+
/* Helper function for split_constant_offset. Expresses OP0 CODE OP1
(the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
constant of type ssizetype, and returns true. If we cannot do this
static bool
split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
- tree *var, tree *off)
+ tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache)
{
tree var0, var1;
tree off0, off1;
/* FALLTHROUGH */
case PLUS_EXPR:
case MINUS_EXPR:
- split_constant_offset (op0, &var0, &off0);
- split_constant_offset (op1, &var1, &off1);
+ split_constant_offset (op0, &var0, &off0, cache);
+ split_constant_offset (op1, &var1, &off1, cache);
*var = fold_build2 (code, type, var0, var1);
*off = size_binop (ocode, off0, off1);
return true;
if (TREE_CODE (op1) != INTEGER_CST)
return false;
- split_constant_offset (op0, &var0, &off0);
+ split_constant_offset (op0, &var0, &off0, cache);
*var = fold_build2 (MULT_EXPR, type, var0, op1);
*off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
return true;
if (poffset)
{
- split_constant_offset (poffset, &poffset, &off1);
+ split_constant_offset (poffset, &poffset, &off1, cache);
off0 = size_binop (PLUS_EXPR, off0, off1);
if (POINTER_TYPE_P (TREE_TYPE (base)))
base = fold_build_pointer_plus (base, poffset);
if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
return false;
- var0 = gimple_assign_rhs1 (def_stmt);
subcode = gimple_assign_rhs_code (def_stmt);
+
+ /* We are using a cache to avoid un-CSEing large amounts of code. */
+ bool use_cache = false;
+ if (!has_single_use (op0)
+ && (subcode == POINTER_PLUS_EXPR
+ || subcode == PLUS_EXPR
+ || subcode == MINUS_EXPR
+ || subcode == MULT_EXPR
+ || subcode == ADDR_EXPR
+ || CONVERT_EXPR_CODE_P (subcode)))
+ {
+ use_cache = true;
+ bool existed;
+ std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
+ if (existed)
+ {
+ if (integer_zerop (e.second))
+ return false;
+ *var = e.first;
+ *off = e.second;
+ return true;
+ }
+ e = std::make_pair (op0, ssize_int (0));
+ }
+
+ var0 = gimple_assign_rhs1 (def_stmt);
var1 = gimple_assign_rhs2 (def_stmt);
- return split_constant_offset_1 (type, var0, subcode, var1, var, off);
+ bool res = split_constant_offset_1 (type, var0, subcode, var1,
+ var, off, cache);
+ if (res && use_cache)
+ *cache.get (op0) = std::make_pair (*var, *off);
+ return res;
}
CASE_CONVERT:
{
- /* We must not introduce undefined overflow, and we must not change the value.
- Hence we're okay if the inner type doesn't overflow to start with
- (pointer or signed), the outer type also is an integer or pointer
- and the outer precision is at least as large as the inner. */
+ /* We must not introduce undefined overflow, and we must not change
+ the value. Hence we're okay if the inner type doesn't overflow
+ to start with (pointer or signed), the outer type also is an
+ integer or pointer and the outer precision is at least as large
+ as the inner. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
- || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+ || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
{
- split_constant_offset (op0, &var0, off);
+ if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
+ {
+ /* Split the unconverted operand and try to prove that
+ wrapping isn't a problem. */
+ tree tmp_var, tmp_off;
+ split_constant_offset (op0, &tmp_var, &tmp_off, cache);
+
+ /* See whether we have an SSA_NAME whose range is known
+ to be [A, B]. */
+ if (TREE_CODE (tmp_var) != SSA_NAME)
+ return false;
+ wide_int var_min, var_max;
+ value_range_kind vr_type = get_range_info (tmp_var, &var_min,
+ &var_max);
+ wide_int var_nonzero = get_nonzero_bits (tmp_var);
+ signop sgn = TYPE_SIGN (itype);
+ if (intersect_range_with_nonzero_bits (vr_type, &var_min,
+ &var_max, var_nonzero,
+ sgn) != VR_RANGE)
+ return false;
+
+ /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
+ is known to be [A + TMP_OFF, B + TMP_OFF], with all
+ operations done in ITYPE. The addition must overflow
+ at both ends of the range or at neither. */
+ wi::overflow_type overflow[2];
+ unsigned int prec = TYPE_PRECISION (itype);
+ wide_int woff = wi::to_wide (tmp_off, prec);
+ wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
+ wi::add (var_max, woff, sgn, &overflow[1]);
+ if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
+ return false;
+
+ /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
+ widest_int diff = (widest_int::from (op0_min, sgn)
+ - widest_int::from (var_min, sgn));
+ var0 = tmp_var;
+ *off = wide_int_to_tree (ssizetype, diff);
+ }
+ else
+ split_constant_offset (op0, &var0, off, cache);
*var = fold_convert (type, var0);
return true;
}
/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
will be ssizetype. */
-void
-split_constant_offset (tree exp, tree *var, tree *off)
+static void
+split_constant_offset (tree exp, tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache)
{
tree type = TREE_TYPE (exp), op0, op1, e, o;
enum tree_code code;
code = TREE_CODE (exp);
extract_ops_from_tree (exp, &code, &op0, &op1);
- if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
+ if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache))
{
*var = e;
*off = o;
}
}
+void
+split_constant_offset (tree exp, tree *var, tree *off)
+{
+ static hash_map<tree, std::pair<tree, tree> > *cache;
+ if (!cache)
+ cache = new hash_map<tree, std::pair<tree, tree> > (37);
+ split_constant_offset (exp, var, off, *cache);
+ cache->empty ();
+}
+
/* Returns the address ADDR of an object in a canonical shape (without nop
casts, and with type of pointer to the object). */
return build_fold_addr_expr (TREE_OPERAND (addr, 0));
}
-/* Analyze the behavior of memory reference REF. There are two modes:
+/* Analyze the behavior of memory reference REF within STMT.
+ There are two modes:
- BB analysis. In this case we simply split the address into base,
init and offset components, without reference to any containing loop.
Return true if the analysis succeeded and store the results in DRB if so.
BB analysis can only fail for bitfield or reversed-storage accesses. */
-bool
+opt_result
dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
- struct loop *loop)
+ struct loop *loop, const gimple *stmt)
{
poly_int64 pbitsize, pbitpos;
tree base, poffset;
poly_int64 pbytepos;
if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: bit offset alignment.\n");
- return false;
- }
+ return opt_result::failure_at (stmt,
+ "failed: bit offset alignment.\n");
if (preversep)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: reverse storage order.\n");
- return false;
- }
+ return opt_result::failure_at (stmt,
+ "failed: reverse storage order.\n");
/* Calculate the alignment and misalignment for the inner reference. */
unsigned int HOST_WIDE_INT bit_base_misalignment;
if (in_loop)
{
if (!simple_iv (loop, loop, base, &base_iv, true))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of base is not affine.\n");
- return false;
- }
+ return opt_result::failure_at
+ (stmt, "failed: evolution of base is not affine.\n");
}
else
{
offset_iv.step = ssize_int (0);
}
else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of offset is not affine.\n");
- return false;
- }
+ return opt_result::failure_at
+ (stmt, "failed: evolution of offset is not affine.\n");
}
init = ssize_int (pbytepos);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "success.\n");
- return true;
+ return opt_result::success ();
}
/* Return true if OP is a valid component reference for a DR access
DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
dr_analyze_innermost (&DR_INNERMOST (dr), memref,
- nest != NULL ? loop : NULL);
+ nest != NULL ? loop : NULL, stmt);
dr_analyze_indices (dr, nest, loop);
dr_analyze_alias (dr);
return dr;
}
-/* A helper function computes order between two tree epxressions T1 and T2.
+/* A helper function computes order between two tree expressions T1 and T2.
This is used in comparator functions sorting objects based on the order
of tree expressions. The function returns -1, 0, or 1. */
/* Return TRUE it's possible to resolve data dependence DDR by runtime alias
check. */
-bool
+opt_result
runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
{
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE,
+ "consider run-time aliasing test between %T and %T\n",
+ DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
if (!speed_p)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported when optimizing "
- "for size.\n");
- return false;
- }
+ return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
+ "runtime alias check not supported when"
+ " optimizing for size.\n");
/* FORNOW: We don't support versioning with outer-loop in either
vectorization or loop distribution. */
if (loop != NULL && loop->inner != NULL)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported for outer loop.\n");
- return false;
- }
-
- /* FORNOW: We don't support creating runtime alias tests for non-constant
- step. */
- if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
- || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported for non-constant "
- "step\n");
- return false;
- }
+ return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
+ "runtime alias check not supported for"
+ " outer loop.\n");
- return true;
+ return opt_result::success ();
}
/* Operator == between two dr_with_seg_len objects.
operator == (const dr_with_seg_len& d1,
const dr_with_seg_len& d2)
{
- return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
- DR_BASE_ADDRESS (d2.dr), 0)
- && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
- && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
- && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
+ return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
+ DR_BASE_ADDRESS (d2.dr), 0)
+ && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
+ && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
+ && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
+ && known_eq (d1.access_size, d2.access_size)
+ && d1.align == d2.align);
}
/* Comparison function for sorting objects of dr_with_seg_len_pair_t
void
prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
- poly_uint64 factor)
+ poly_uint64)
{
/* Sort the collected data ref pairs so that we can scan them once to
combine all possible aliasing checks. */
if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
{
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "found equal ranges ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
+ DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
alias_pairs->ordered_remove (i--);
continue;
}
}
poly_int64 init_a1, init_a2;
+ /* Only consider cases in which the distance between the initial
+ DR_A1 and the initial DR_A2 is known at compile time. */
if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
DR_BASE_ADDRESS (dr_a2->dr), 0)
|| !operand_equal_p (DR_OFFSET (dr_a1->dr),
std::swap (init_a1, init_a2);
}
- /* Only merge const step data references. */
- poly_int64 step_a1, step_a2;
- if (!poly_int_tree_p (DR_STEP (dr_a1->dr), &step_a1)
- || !poly_int_tree_p (DR_STEP (dr_a2->dr), &step_a2))
- continue;
+ /* Work out what the segment length would be if we did combine
+ DR_A1 and DR_A2:
- bool neg_step = maybe_lt (step_a1, 0) || maybe_lt (step_a2, 0);
+ - If DR_A1 and DR_A2 have equal lengths, that length is
+ also the combined length.
- /* DR_A1 and DR_A2 must go in the same direction. */
- if (neg_step && (maybe_gt (step_a1, 0) || maybe_gt (step_a2, 0)))
- continue;
+ - If DR_A1 and DR_A2 both have negative "lengths", the combined
+ length is the lower bound on those lengths.
- poly_uint64 seg_len_a1 = 0, seg_len_a2 = 0;
- bool const_seg_len_a1 = poly_int_tree_p (dr_a1->seg_len,
- &seg_len_a1);
- bool const_seg_len_a2 = poly_int_tree_p (dr_a2->seg_len,
- &seg_len_a2);
-
- /* We need to compute merged segment length at compilation time for
- dr_a1 and dr_a2, which is impossible if either one has non-const
- segment length. */
- if ((!const_seg_len_a1 || !const_seg_len_a2)
- && maybe_ne (step_a1, step_a2))
- continue;
+ - If DR_A1 and DR_A2 both have positive lengths, the combined
+ length is the upper bound on those lengths.
- bool do_remove = false;
- poly_uint64 diff = init_a2 - init_a1;
- poly_uint64 min_seg_len_b;
- tree new_seg_len;
+ Other cases are unlikely to give a useful combination.
- if (!poly_int_tree_p (dr_b1->seg_len, &min_seg_len_b))
+ The lengths both have sizetype, so the sign is taken from
+ the step instead. */
+ if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
{
- tree step_b = DR_STEP (dr_b1->dr);
- if (!tree_fits_shwi_p (step_b))
+ poly_uint64 seg_len_a1, seg_len_a2;
+ if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
+ || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
continue;
- min_seg_len_b = factor * abs_hwi (tree_to_shwi (step_b));
- }
-
- /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
- Case A:
- check if the following condition is satisfied:
-
- DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
-
- where DIFF = DR_A2_INIT - DR_A1_INIT. However,
- SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
- have to make a best estimation. We can get the minimum value
- of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
- then either of the following two conditions can guarantee the
- one above:
-
- 1: DIFF <= MIN_SEG_LEN_B
- 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
- Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
- to take care of wrapping behavior in it.
-
- Case B:
- If the left segment does not extend beyond the start of the
- right segment the new segment length is that of the right
- plus the segment distance. The condition is like:
+ tree indicator_a = dr_direction_indicator (dr_a1->dr);
+ if (TREE_CODE (indicator_a) != INTEGER_CST)
+ continue;
- DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
+ tree indicator_b = dr_direction_indicator (dr_a2->dr);
+ if (TREE_CODE (indicator_b) != INTEGER_CST)
+ continue;
- Note 1: Case A.2 and B combined together effectively merges every
- dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
+ int sign_a = tree_int_cst_sgn (indicator_a);
+ int sign_b = tree_int_cst_sgn (indicator_b);
- Note 2: Above description is based on positive DR_STEP, we need to
- take care of negative DR_STEP for wrapping behavior. See PR80815
- for more information. */
- if (neg_step)
- {
- /* Adjust diff according to access size of both references. */
- diff += tree_to_poly_uint64
- (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr))));
- diff -= tree_to_poly_uint64
- (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr))));
- /* Case A.1. */
- if (known_le (diff, min_seg_len_b)
- /* Case A.2 and B combined. */
- || const_seg_len_a2)
- {
- if (const_seg_len_a1 || const_seg_len_a2)
- new_seg_len
- = build_int_cstu (sizetype,
- lower_bound (seg_len_a1 - diff,
- seg_len_a2));
- else
- new_seg_len
- = size_binop (MINUS_EXPR, dr_a2->seg_len,
- build_int_cstu (sizetype, diff));
+ poly_uint64 new_seg_len;
+ if (sign_a <= 0 && sign_b <= 0)
+ new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
+ else if (sign_a >= 0 && sign_b >= 0)
+ new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
+ else
+ continue;
- dr_a2->seg_len = new_seg_len;
- do_remove = true;
- }
+ dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
+ new_seg_len);
+ dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
}
- else
- {
- /* Case A.1. */
- if (known_le (diff, min_seg_len_b)
- /* Case A.2 and B combined. */
- || const_seg_len_a1)
- {
- if (const_seg_len_a1 && const_seg_len_a2)
- new_seg_len
- = build_int_cstu (sizetype,
- upper_bound (seg_len_a2 + diff,
- seg_len_a1));
- else
- new_seg_len
- = size_binop (PLUS_EXPR, dr_a2->seg_len,
- build_int_cstu (sizetype, diff));
- dr_a1->seg_len = new_seg_len;
- do_remove = true;
- }
- }
+ /* This is always positive due to the swap above. */
+ poly_uint64 diff = init_a2 - init_a1;
- if (do_remove)
+ /* The new check will start at DR_A1. Make sure that its access
+ size encompasses the initial DR_A2. */
+ if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
{
- if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "merging ranges for ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
- alias_pairs->ordered_remove (neg_step ? i - 1 : i);
- i--;
+ dr_a1->access_size = upper_bound (dr_a1->access_size,
+ diff + dr_a2->access_size);
+ unsigned int new_align = known_alignment (dr_a1->access_size);
+ dr_a1->align = MIN (dr_a1->align, new_align);
}
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
+ DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
+ alias_pairs->ordered_remove (i);
+ i--;
}
}
}
|| DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
return false;
- if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
+ poly_uint64 seg_len1, seg_len2;
+ if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
+ || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
return false;
if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
- unsigned HOST_WIDE_INT abs_step
- = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
+ unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
+ if (neg_step)
+ {
+ abs_step = -abs_step;
+ seg_len1 = -seg_len1;
+ seg_len2 = -seg_len2;
+ }
+ else
+ {
+ /* Include the access size in the length, so that we only have one
+ tree addition below. */
+ seg_len1 += dr_a.access_size;
+ seg_len2 += dr_b.access_size;
+ }
- unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
- unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
/* Infer the number of iterations with which the memory segment is accessed
by DR. In other words, alias is checked if memory segment accessed by
DR_A in some iterations intersect with memory segment accessed by DR_B
in the same amount iterations.
Note segnment length is a linear function of number of iterations with
DR_STEP as the coefficient. */
- unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
- unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
+ poly_uint64 niter_len1, niter_len2;
+ if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
+ || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
+ return false;
+
+ poly_uint64 niter_access1 = 0, niter_access2 = 0;
+ if (neg_step)
+ {
+ /* Divide each access size by the byte step, rounding up. */
+ if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
+ abs_step, &niter_access1)
+ || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
+ abs_step, &niter_access2))
+ return false;
+ }
unsigned int i;
for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
/* Adjust ranges for negative step. */
if (neg_step)
{
- min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
- max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
- CHREC_LEFT (access1), idx_step);
- min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
- max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
- CHREC_LEFT (access2), idx_step);
+ /* IDX_LEN1 and IDX_LEN2 are negative in this case. */
+ std::swap (min1, max1);
+ std::swap (min2, max2);
+
+ /* As with the lengths just calculated, we've measured the access
+ sizes in iterations, so multiply them by the index step. */
+ tree idx_access1
+ = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
+ build_int_cst (TREE_TYPE (min1), niter_access1));
+ tree idx_access2
+ = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
+ build_int_cst (TREE_TYPE (min2), niter_access2));
+
+ /* MINUS_EXPR because the above values are negative. */
+ max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
+ max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
}
tree part_cond_expr
= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
return true;
}
+/* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
+ every address ADDR accessed by D:
+
+ *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
+
+ In this case, every element accessed by D is aligned to at least
+ ALIGN bytes.
+
+ If ALIGN is zero then instead set *SEG_MAX_OUT so that:
+
+ *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
+
+static void
+get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
+ tree *seg_max_out, HOST_WIDE_INT align)
+{
+ /* Each access has the following pattern:
+
+ <- |seg_len| ->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ,.... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <- |seg_len| ->
+ |
+ base address
+
+ where "n" is the number of scalar iterations covered by the segment.
+ (This should be VF for a particular pair if we know that both steps
+ are the same, otherwise it will be the full number of scalar loop
+ iterations.)
+
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
+
+ If the access size is "access_size" bytes, the lowest addressed byte is:
+
+ base + (step < 0 ? seg_len : 0) [LB]
+
+ and the highest addressed byte is always below:
+
+ base + (step < 0 ? 0 : seg_len) + access_size [UB]
+
+ Thus:
+
+ LB <= ADDR < UB
+
+ If ALIGN is nonzero, all three values are aligned to at least ALIGN
+ bytes, so:
+
+ LB <= ADDR <= UB - ALIGN
+
+ where "- ALIGN" folds naturally with the "+ access_size" and often
+ cancels it out.
+
+ We don't try to simplify LB and UB beyond this (e.g. by using
+ MIN and MAX based on whether seg_len rather than the stride is
+ negative) because it is possible for the absolute size of the
+ segment to overflow the range of a ssize_t.
+
+ Keeping the pointer_plus outside of the cond_expr should allow
+ the cond_exprs to be shared with other alias checks. */
+ tree indicator = dr_direction_indicator (d.dr);
+ tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+ tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
+ DR_OFFSET (d.dr));
+ addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
+ tree seg_len
+ = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
+
+ tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
+ seg_len, size_zero_node);
+ tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
+ size_zero_node, seg_len);
+ max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
+ size_int (d.access_size - align));
+
+ *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
+ *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
+}
+
/* Given two data references and segment lengths described by DR_A and DR_B,
create expression checking if the two addresses ranges intersect with
each other:
if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
return;
- tree segment_length_a = dr_a.seg_len;
- tree segment_length_b = dr_b.seg_len;
- tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
- tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
- tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
-
- offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
- offset_a, DR_INIT (dr_a.dr));
- offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
- offset_b, DR_INIT (dr_b.dr));
- addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
- addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
-
- tree seg_a_min = addr_base_a;
- tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
- /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
- bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
- [a, a+12) */
- if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
+ unsigned HOST_WIDE_INT min_align;
+ tree_code cmp_code;
+ if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
+ && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
{
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
- seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
- seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
+ /* In this case adding access_size to seg_len is likely to give
+ a simple X * step, where X is either the number of scalar
+ iterations or the vectorization factor. We're better off
+ keeping that, rather than subtracting an alignment from it.
+
+ In this case the maximum values are exclusive and so there is
+ no alias if the maximum of one segment equals the minimum
+ of another. */
+ min_align = 0;
+ cmp_code = LE_EXPR;
}
-
- tree seg_b_min = addr_base_b;
- tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
- if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
+ else
{
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
- seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
- seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
+ /* Calculate the minimum alignment shared by all four pointers,
+ then arrange for this alignment to be subtracted from the
+ exclusive maximum values to get inclusive maximum values.
+ This "- min_align" is cumulative with a "+ access_size"
+ in the calculation of the maximum values. In the best
+ (and common) case, the two cancel each other out, leaving
+ us with an inclusive bound based only on seg_len. In the
+ worst case we're simply adding a smaller number than before.
+
+ Because the maximum values are inclusive, there is an alias
+ if the maximum value of one segment is equal to the minimum
+ value of the other. */
+ min_align = MIN (dr_a.align, dr_b.align);
+ cmp_code = LT_EXPR;
}
+
+ tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
+ get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
+ get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
+
*cond_expr
= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
- fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
- fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
+ fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
+ fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
}
/* Create a conditional expression that represents the run-time checks for
{
tree part_cond_expr;
+ fold_defer_overflow_warnings ();
for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
{
const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "create runtime check for data references ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE,
+ "create runtime check for data references %T and %T\n",
+ DR_REF (dr_a.dr), DR_REF (dr_b.dr));
/* Create condition expression for each pair data references. */
create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
else
*cond_expr = part_cond_expr;
}
+ fold_undefer_and_ignore_overflow_warnings ();
}
/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
{
if (TREE_CODE (obj) == ARRAY_REF)
{
- /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
- need to check the stride and the lower bound of the reference. */
- if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
- loop->num)
- || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
- loop->num))
- return false;
+ for (int i = 1; i < 4; ++i)
+ if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
+ loop->num))
+ return false;
}
else if (TREE_CODE (obj) == COMPONENT_REF)
{
bool
dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
- bool loop_nest)
+ struct loop *loop_nest)
{
tree addr_a = DR_BASE_OBJECT (a);
tree addr_b = DR_BASE_OBJECT (b);
if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
&& (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
+ /* For cross-iteration dependences the cliques must be valid for the
+ whole loop, not just individual iterations. */
+ && (!loop_nest
+ || MR_DEPENDENCE_CLIQUE (addr_a) == 1
+ || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
&& MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
&& MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
return false;
}
/* If the data references do not alias, then they are independent. */
- if (!dr_may_alias_p (a, b, loop_nest.exists ()))
+ if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
{
DDR_ARE_DEPENDENT (res) = chrec_known;
return res;
DDR_ARE_DEPENDENT (res) = NULL_TREE;
DDR_SUBSCRIPTS (res).create (full_seq.length);
DDR_LOOP_NEST (res) = loop_nest;
- DDR_INNER_LOOP (res) = 0;
DDR_SELF_REFERENCE (res) = false;
for (i = 0; i < full_seq.length; ++i)
{
if (value0 == false)
{
- if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
+ if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
+ || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "siv test failed: chrec not positive.\n");
}
else
{
- if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
+ if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
+ || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "siv test failed: chrec not positive.\n");
switch (TREE_CODE (chrec))
{
case POLYNOMIAL_CHREC:
+ if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
+ return chrec_dont_know;
A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
mat[i][j] = (i == j) ? 1 : 0;
}
-/* Return the first nonzero element of vector VEC1 between START and N.
- We must have START <= N. Returns N if VEC1 is the zero vector. */
+/* Return the index of the first nonzero element of vector VEC1 between
+ START and N. We must have START <= N.
+ Returns N if VEC1 is the zero vector. */
static int
lambda_vector_first_nz (lambda_vector vec1, int n, int start)
R2 = R2 + CONST1 * R1. */
static void
-lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
+lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
+ lambda_int const1)
{
int i;
static void
lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
- int size, int const1)
+ int size, lambda_int const1)
{
int i;
{
while (S[i][j] != 0)
{
- int sigma, factor, a, b;
+ lambda_int sigma, factor, a, b;
a = S[i-1][j];
b = S[i][j];
sigma = (a * b < 0) ? -1: 1;
- a = abs (a);
- b = abs (b);
+ a = abs_hwi (a);
+ b = abs_hwi (b);
factor = sigma * (a / b);
lambda_matrix_row_add (S, n, i, i-1, -factor);
tree *last_conflicts)
{
unsigned nb_vars_a, nb_vars_b, dim;
- HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
+ HOST_WIDE_INT gamma, gcd_alpha_beta;
lambda_matrix A, U, S;
struct obstack scratch_obstack;
A = lambda_matrix_new (dim, 1, &scratch_obstack);
S = lambda_matrix_new (dim, 1, &scratch_obstack);
- init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
- init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
- gamma = init_b - init_a;
+ tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
+ tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
+ if (init_a == chrec_dont_know
+ || init_b == chrec_dont_know)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "affine-affine test failed: "
+ "representation issue.\n");
+ *overlaps_a = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
+ *last_conflicts = chrec_dont_know;
+ goto end_analyze_subs_aa;
+ }
+ gamma = int_cst_value (init_b) - int_cst_value (init_a);
/* Don't do all the hard work of solving the Diophantine equation
when we already know the solution: for example,
if (niter > 0)
{
- HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
- FLOOR_DIV (niter_b - j0, j1));
- HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
-
/* If the overlap occurs outside of the bounds of the
loop, there is no dependence. */
if (x1 >= niter_a || y1 >= niter_b)
*last_conflicts = integer_zero_node;
goto end_analyze_subs_aa;
}
+
+ /* max stmt executions can get quite large, avoid
+ overflows by using wide ints here. */
+ widest_int tau2
+ = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
+ wi::sdiv_floor (wi::sub (niter_b, j0), j1));
+ widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
+ if (wi::min_precision (last_conflict, SIGNED)
+ <= TYPE_PRECISION (integer_type_node))
+ *last_conflicts
+ = build_int_cst (integer_type_node,
+ last_conflict.to_shwi ());
else
- *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
+ *last_conflicts = chrec_dont_know;
}
else
*last_conflicts = chrec_dont_know;
}
else if (evolution_function_is_constant_p (difference)
- /* For the moment, the following is verified:
- evolution_function_is_affine_multivariate_p (chrec_a,
- loop_nest->num) */
+ && evolution_function_is_affine_multivariate_p (chrec_a,
+ loop_nest->num)
&& !gcd_of_steps_may_divide_p (chrec_a, difference))
{
/* testsuite/.../ssa-chrec-33.c
dependence_stats.num_miv_independent++;
}
- else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
- && !chrec_contains_symbols (chrec_a)
- && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
- && !chrec_contains_symbols (chrec_b))
+ else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
+ && !chrec_contains_symbols (chrec_a, loop_nest)
+ && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
+ && !chrec_contains_symbols (chrec_b, loop_nest))
{
/* testsuite/.../ssa-chrec-35.c
{0, +, 1}_2 vs. {0, +, 1}_3
{
unsigned i;
lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
+ struct loop *loop = DDR_LOOP_NEST (ddr)[0];
for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
{
return false;
}
+ /* When data references are collected in a loop while data
+ dependences are analyzed in loop nest nested in the loop, we
+ would have more number of access functions than number of
+ loops. Skip access functions of loops not in the loop nest.
+
+ See PR89725 for more information. */
+ if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
+ continue;
+
dist = int_cst_value (SUB_DISTANCE (subscript));
index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
*index_carry = MIN (index, *index_carry);
unsigned i;
int index_carry = DDR_NB_LOOPS (ddr);
subscript *sub;
+ struct loop *loop = DDR_LOOP_NEST (ddr)[0];
FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
{
if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
{
- if (!evolution_function_is_univariate_p (access_fun))
+ if (!evolution_function_is_univariate_p (access_fun, loop->num))
{
if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
{
return;
}
+ /* When data references are collected in a loop while data
+ dependences are analyzed in loop nest nested in the loop, we
+ would have more number of access functions than number of
+ loops. Skip access functions of loops not in the loop nest.
+
+ See PR89725 for more information. */
+ if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
+ loop))
+ continue;
+
index_carry = MIN (index_carry,
index_in_loop_nest (CHREC_VARIABLE (access_fun),
DDR_LOOP_NEST (ddr)));
{
lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
- dist_v[DDR_INNER_LOOP (ddr)] = 1;
+ dist_v[0] = 1;
save_dist_v (ddr, dist_v);
}
reference, returns false, otherwise returns true. NEST is the outermost
loop of the loop nest in which the references should be analyzed. */
-bool
+opt_result
find_data_references_in_stmt (struct loop *nest, gimple *stmt,
vec<data_reference_p> *datarefs)
{
unsigned i;
auto_vec<data_ref_loc, 2> references;
data_ref_loc *ref;
- bool ret = true;
data_reference_p dr;
if (get_references_in_stmt (stmt, &references))
- return false;
+ return opt_result::failure_at (stmt, "statement clobbers memory: %G",
+ stmt);
FOR_EACH_VEC_ELT (references, i, ref)
{
datarefs->safe_push (dr);
}
- return ret;
+ return opt_result::success ();
}
/* Stores the data references in STMT to DATAREFS. If there is an
return alignment;
}
+/* If BASE is a pointer-typed SSA name, try to find the object that it
+ is based on. Return this object X on success and store the alignment
+ in bytes of BASE - &X in *ALIGNMENT_OUT. */
+
+static tree
+get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
+{
+ if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
+ return NULL_TREE;
+
+ gimple *def = SSA_NAME_DEF_STMT (base);
+ base = analyze_scalar_evolution (loop_containing_stmt (def), base);
+
+ /* Peel chrecs and record the minimum alignment preserved by
+ all steps. */
+ unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
+ while (TREE_CODE (base) == POLYNOMIAL_CHREC)
+ {
+ unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
+ alignment = MIN (alignment, step_alignment);
+ base = CHREC_LEFT (base);
+ }
+
+ /* Punt if the expression is too complicated to handle. */
+ if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
+ return NULL_TREE;
+
+ /* The only useful cases are those for which a dereference folds to something
+ other than an INDIRECT_REF. */
+ tree ref_type = TREE_TYPE (TREE_TYPE (base));
+ tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
+ if (!ref)
+ return NULL_TREE;
+
+ /* Analyze the base to which the steps we peeled were applied. */
+ poly_int64 bitsize, bitpos, bytepos;
+ machine_mode mode;
+ int unsignedp, reversep, volatilep;
+ tree offset;
+ base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
+ if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
+ return NULL_TREE;
+
+ /* Restrict the alignment to that guaranteed by the offsets. */
+ unsigned int bytepos_alignment = known_alignment (bytepos);
+ if (bytepos_alignment != 0)
+ alignment = MIN (alignment, bytepos_alignment);
+ if (offset)
+ {
+ unsigned int offset_alignment = highest_pow2_factor (offset);
+ alignment = MIN (alignment, offset_alignment);
+ }
+
+ *alignment_out = alignment;
+ return base;
+}
+
+/* Return the object whose alignment would need to be changed in order
+ to increase the alignment of ADDR. Store the maximum achievable
+ alignment in *MAX_ALIGNMENT. */
+
+tree
+get_base_for_alignment (tree addr, unsigned int *max_alignment)
+{
+ tree base = get_base_for_alignment_1 (addr, max_alignment);
+ if (base)
+ return base;
+
+ if (TREE_CODE (addr) == ADDR_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
+ return addr;
+}
+
/* Recursive helper function. */
static bool
free_data_ref (dr);
datarefs.release ();
}
+
+/* Common routine implementing both dr_direction_indicator and
+ dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
+ to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
+ Return the step as the indicator otherwise. */
+
+static tree
+dr_step_indicator (struct data_reference *dr, int useful_min)
+{
+ tree step = DR_STEP (dr);
+ if (!step)
+ return NULL_TREE;
+ STRIP_NOPS (step);
+ /* Look for cases where the step is scaled by a positive constant
+ integer, which will often be the access size. If the multiplication
+ doesn't change the sign (due to overflow effects) then we can
+ test the unscaled value instead. */
+ if (TREE_CODE (step) == MULT_EXPR
+ && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
+ && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
+ {
+ tree factor = TREE_OPERAND (step, 1);
+ step = TREE_OPERAND (step, 0);
+
+ /* Strip widening and truncating conversions as well as nops. */
+ if (CONVERT_EXPR_P (step)
+ && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
+ step = TREE_OPERAND (step, 0);
+ tree type = TREE_TYPE (step);
+
+ /* Get the range of step values that would not cause overflow. */
+ widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
+ / wi::to_widest (factor));
+ widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
+ / wi::to_widest (factor));
+
+ /* Get the range of values that the unconverted step actually has. */
+ wide_int step_min, step_max;
+ if (TREE_CODE (step) != SSA_NAME
+ || get_range_info (step, &step_min, &step_max) != VR_RANGE)
+ {
+ step_min = wi::to_wide (TYPE_MIN_VALUE (type));
+ step_max = wi::to_wide (TYPE_MAX_VALUE (type));
+ }
+
+ /* Check whether the unconverted step has an acceptable range. */
+ signop sgn = TYPE_SIGN (type);
+ if (wi::les_p (minv, widest_int::from (step_min, sgn))
+ && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
+ {
+ if (wi::ge_p (step_min, useful_min, sgn))
+ return ssize_int (useful_min);
+ else if (wi::lt_p (step_max, 0, sgn))
+ return ssize_int (-1);
+ else
+ return fold_convert (ssizetype, step);
+ }
+ }
+ return DR_STEP (dr);
+}
+
+/* Return a value that is negative iff DR has a negative step. */
+
+tree
+dr_direction_indicator (struct data_reference *dr)
+{
+ return dr_step_indicator (dr, 0);
+}
+
+/* Return a value that is zero iff DR has a zero step. */
+
+tree
+dr_zero_step_indicator (struct data_reference *dr)
+{
+ return dr_step_indicator (dr, 1);
+}
+
+/* Return true if DR is known to have a nonnegative (but possibly zero)
+ step. */
+
+bool
+dr_known_forward_stride_p (struct data_reference *dr)
+{
+ tree indicator = dr_direction_indicator (dr);
+ tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+ return neg_step_val && integer_zerop (neg_step_val);
+}