/* Data references and dependences detectors.
- Copyright (C) 2003-2017 Free Software Foundation, Inc.
+ Copyright (C) 2003-2020 Free Software Foundation, Inc.
Contributed by Sebastian Pop <pop@cri.ensmp.fr>
This file is part of GCC.
#include "tree-scalar-evolution.h"
#include "dumpfile.h"
#include "tree-affine.h"
-#include "params.h"
#include "builtins.h"
+#include "tree-eh.h"
+#include "ssa.h"
+#include "internal-fn.h"
static struct datadep_stats
{
static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
unsigned int, unsigned int,
- struct loop *);
+ class loop *);
/* Returns true iff A divides B. */
static inline bool
int i;
for (i = 0; i < n; i++)
- fprintf (outfile, "%3d ", vector[i]);
+ fprintf (outfile, "%3d ", (int)vector[i]);
fprintf (outfile, "\n");
}
else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
{
unsigned int i;
- struct loop *loopi;
+ class loop *loopi;
subscript *sub;
FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
dump_subscript (outf, sub);
}
- fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
fprintf (outf, " loop nest: (");
FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
fprintf (outf, "%d ", loopi->num);
dump_ddrs (stderr, ddrs);
}
+static void
+split_constant_offset (tree exp, tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache,
+ unsigned *limit);
+
/* Helper function for split_constant_offset. Expresses OP0 CODE OP1
(the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
constant of type ssizetype, and returns true. If we cannot do this
static bool
split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
- tree *var, tree *off)
+ tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache,
+ unsigned *limit)
{
tree var0, var1;
tree off0, off1;
/* FALLTHROUGH */
case PLUS_EXPR:
case MINUS_EXPR:
- split_constant_offset (op0, &var0, &off0);
- split_constant_offset (op1, &var1, &off1);
+ if (TREE_CODE (op1) == INTEGER_CST)
+ {
+ split_constant_offset (op0, &var0, &off0, cache, limit);
+ *var = var0;
+ *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
+ return true;
+ }
+ split_constant_offset (op0, &var0, &off0, cache, limit);
+ split_constant_offset (op1, &var1, &off1, cache, limit);
*var = fold_build2 (code, type, var0, var1);
*off = size_binop (ocode, off0, off1);
return true;
if (TREE_CODE (op1) != INTEGER_CST)
return false;
- split_constant_offset (op0, &var0, &off0);
+ split_constant_offset (op0, &var0, &off0, cache, limit);
*var = fold_build2 (MULT_EXPR, type, var0, op1);
*off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
return true;
case ADDR_EXPR:
{
tree base, poffset;
- HOST_WIDE_INT pbitsize, pbitpos;
+ poly_int64 pbitsize, pbitpos, pbytepos;
machine_mode pmode;
int punsignedp, preversep, pvolatilep;
= get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
&punsignedp, &preversep, &pvolatilep);
- if (pbitpos % BITS_PER_UNIT != 0)
+ if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
return false;
base = build_fold_addr_expr (base);
- off0 = ssize_int (pbitpos / BITS_PER_UNIT);
+ off0 = ssize_int (pbytepos);
if (poffset)
{
- split_constant_offset (poffset, &poffset, &off1);
+ split_constant_offset (poffset, &poffset, &off1, cache, limit);
off0 = size_binop (PLUS_EXPR, off0, off1);
if (POINTER_TYPE_P (TREE_TYPE (base)))
base = fold_build_pointer_plus (base, poffset);
if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
return false;
- var0 = gimple_assign_rhs1 (def_stmt);
subcode = gimple_assign_rhs_code (def_stmt);
+
+ /* We are using a cache to avoid un-CSEing large amounts of code. */
+ bool use_cache = false;
+ if (!has_single_use (op0)
+ && (subcode == POINTER_PLUS_EXPR
+ || subcode == PLUS_EXPR
+ || subcode == MINUS_EXPR
+ || subcode == MULT_EXPR
+ || subcode == ADDR_EXPR
+ || CONVERT_EXPR_CODE_P (subcode)))
+ {
+ use_cache = true;
+ bool existed;
+ std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
+ if (existed)
+ {
+ if (integer_zerop (e.second))
+ return false;
+ *var = e.first;
+ *off = e.second;
+ return true;
+ }
+ e = std::make_pair (op0, ssize_int (0));
+ }
+
+ if (*limit == 0)
+ return false;
+ --*limit;
+
+ var0 = gimple_assign_rhs1 (def_stmt);
var1 = gimple_assign_rhs2 (def_stmt);
- return split_constant_offset_1 (type, var0, subcode, var1, var, off);
+ bool res = split_constant_offset_1 (type, var0, subcode, var1,
+ var, off, cache, limit);
+ if (res && use_cache)
+ *cache.get (op0) = std::make_pair (*var, *off);
+ return res;
}
CASE_CONVERT:
{
- /* We must not introduce undefined overflow, and we must not change the value.
- Hence we're okay if the inner type doesn't overflow to start with
- (pointer or signed), the outer type also is an integer or pointer
- and the outer precision is at least as large as the inner. */
+ /* We must not introduce undefined overflow, and we must not change
+ the value. Hence we're okay if the inner type doesn't overflow
+ to start with (pointer or signed), the outer type also is an
+ integer or pointer and the outer precision is at least as large
+ as the inner. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
- || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+ || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
{
- split_constant_offset (op0, &var0, off);
+ if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
+ {
+ /* Split the unconverted operand and try to prove that
+ wrapping isn't a problem. */
+ tree tmp_var, tmp_off;
+ split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
+
+ /* See whether we have an SSA_NAME whose range is known
+ to be [A, B]. */
+ if (TREE_CODE (tmp_var) != SSA_NAME)
+ return false;
+ wide_int var_min, var_max;
+ value_range_kind vr_type = get_range_info (tmp_var, &var_min,
+ &var_max);
+ wide_int var_nonzero = get_nonzero_bits (tmp_var);
+ signop sgn = TYPE_SIGN (itype);
+ if (intersect_range_with_nonzero_bits (vr_type, &var_min,
+ &var_max, var_nonzero,
+ sgn) != VR_RANGE)
+ return false;
+
+ /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
+ is known to be [A + TMP_OFF, B + TMP_OFF], with all
+ operations done in ITYPE. The addition must overflow
+ at both ends of the range or at neither. */
+ wi::overflow_type overflow[2];
+ unsigned int prec = TYPE_PRECISION (itype);
+ wide_int woff = wi::to_wide (tmp_off, prec);
+ wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
+ wi::add (var_max, woff, sgn, &overflow[1]);
+ if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
+ return false;
+
+ /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
+ widest_int diff = (widest_int::from (op0_min, sgn)
+ - widest_int::from (var_min, sgn));
+ var0 = tmp_var;
+ *off = wide_int_to_tree (ssizetype, diff);
+ }
+ else
+ split_constant_offset (op0, &var0, off, cache, limit);
*var = fold_convert (type, var0);
return true;
}
/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
will be ssizetype. */
-void
-split_constant_offset (tree exp, tree *var, tree *off)
+static void
+split_constant_offset (tree exp, tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache,
+ unsigned *limit)
{
- tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
+ tree type = TREE_TYPE (exp), op0, op1, e, o;
enum tree_code code;
*var = exp;
*off = ssize_int (0);
- STRIP_NOPS (exp);
if (tree_is_chrec (exp)
|| get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
return;
- otype = TREE_TYPE (exp);
code = TREE_CODE (exp);
extract_ops_from_tree (exp, &code, &op0, &op1);
- if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
+ if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
{
- *var = fold_convert (type, e);
+ *var = e;
*off = o;
}
}
+void
+split_constant_offset (tree exp, tree *var, tree *off)
+{
+ unsigned limit = param_ssa_name_def_chain_limit;
+ static hash_map<tree, std::pair<tree, tree> > *cache;
+ if (!cache)
+ cache = new hash_map<tree, std::pair<tree, tree> > (37);
+ split_constant_offset (exp, var, off, *cache, &limit);
+ cache->empty ();
+}
+
/* Returns the address ADDR of an object in a canonical shape (without nop
casts, and with type of pointer to the object). */
return build_fold_addr_expr (TREE_OPERAND (addr, 0));
}
-/* Analyze the behavior of memory reference REF. There are two modes:
+/* Analyze the behavior of memory reference REF within STMT.
+ There are two modes:
- BB analysis. In this case we simply split the address into base,
init and offset components, without reference to any containing loop.
Return true if the analysis succeeded and store the results in DRB if so.
BB analysis can only fail for bitfield or reversed-storage accesses. */
-bool
+opt_result
dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
- struct loop *loop)
+ class loop *loop, const gimple *stmt)
{
- HOST_WIDE_INT pbitsize, pbitpos;
+ poly_int64 pbitsize, pbitpos;
tree base, poffset;
machine_mode pmode;
int punsignedp, preversep, pvolatilep;
&punsignedp, &preversep, &pvolatilep);
gcc_assert (base != NULL_TREE);
- if (pbitpos % BITS_PER_UNIT != 0)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: bit offset alignment.\n");
- return false;
- }
+ poly_int64 pbytepos;
+ if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
+ return opt_result::failure_at (stmt,
+ "failed: bit offset alignment.\n");
if (preversep)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: reverse storage order.\n");
- return false;
- }
+ return opt_result::failure_at (stmt,
+ "failed: reverse storage order.\n");
/* Calculate the alignment and misalignment for the inner reference. */
- unsigned int HOST_WIDE_INT base_misalignment;
- unsigned int base_alignment;
- get_object_alignment_1 (base, &base_alignment, &base_misalignment);
+ unsigned int HOST_WIDE_INT bit_base_misalignment;
+ unsigned int bit_base_alignment;
+ get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
/* There are no bitfield references remaining in BASE, so the values
we got back must be whole bytes. */
- gcc_assert (base_alignment % BITS_PER_UNIT == 0
- && base_misalignment % BITS_PER_UNIT == 0);
- base_alignment /= BITS_PER_UNIT;
- base_misalignment /= BITS_PER_UNIT;
+ gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
+ && bit_base_misalignment % BITS_PER_UNIT == 0);
+ unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
+ poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
if (TREE_CODE (base) == MEM_REF)
{
{
/* Subtract MOFF from the base and add it to POFFSET instead.
Adjust the misalignment to reflect the amount we subtracted. */
- offset_int moff = mem_ref_offset (base);
- base_misalignment -= moff.to_short_addr ();
+ poly_offset_int moff = mem_ref_offset (base);
+ base_misalignment -= moff.force_shwi ();
tree mofft = wide_int_to_tree (sizetype, moff);
if (!poffset)
poffset = mofft;
if (in_loop)
{
if (!simple_iv (loop, loop, base, &base_iv, true))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of base is not affine.\n");
- return false;
- }
+ return opt_result::failure_at
+ (stmt, "failed: evolution of base is not affine.\n");
}
else
{
offset_iv.step = ssize_int (0);
}
else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of offset is not affine.\n");
- return false;
- }
+ return opt_result::failure_at
+ (stmt, "failed: evolution of offset is not affine.\n");
}
- init = ssize_int (pbitpos / BITS_PER_UNIT);
+ init = ssize_int (pbytepos);
/* Subtract any constant component from the base and add it to INIT instead.
Adjust the misalignment to reflect the amount we subtracted. */
drb->offset = fold_convert (ssizetype, offset_iv.base);
drb->init = init;
drb->step = step;
- drb->base_alignment = base_alignment;
- drb->base_misalignment = base_misalignment & (base_alignment - 1);
+ if (known_misalignment (base_misalignment, base_alignment,
+ &drb->base_misalignment))
+ drb->base_alignment = base_alignment;
+ else
+ {
+ drb->base_alignment = known_alignment (base_misalignment);
+ drb->base_misalignment = 0;
+ }
drb->offset_alignment = highest_pow2_factor (offset_iv.base);
drb->step_alignment = highest_pow2_factor (step);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "success.\n");
- return true;
+ return opt_result::success ();
}
/* Return true if OP is a valid component reference for a DR access
}
/* Determines the base object and the list of indices of memory reference
- DR, analyzed in LOOP and instantiated in loop nest NEST. */
+ DR, analyzed in LOOP and instantiated before NEST. */
static void
-dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
+dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
{
vec<tree> access_fns = vNULL;
tree ref, op;
tree base, off, access_fn;
- basic_block before_loop;
/* If analyzing a basic-block there are no indices to analyze
and thus no access functions. */
}
ref = DR_REF (dr);
- before_loop = block_before_loop (nest);
/* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
into a two element array with a constant index. The base is
{
op = TREE_OPERAND (ref, 1);
access_fn = analyze_scalar_evolution (loop, op);
- access_fn = instantiate_scev (before_loop, loop, access_fn);
+ access_fn = instantiate_scev (nest, loop, access_fn);
access_fns.safe_push (access_fn);
}
else if (TREE_CODE (ref) == COMPONENT_REF
{
op = TREE_OPERAND (ref, 0);
access_fn = analyze_scalar_evolution (loop, op);
- access_fn = instantiate_scev (before_loop, loop, access_fn);
+ access_fn = instantiate_scev (nest, loop, access_fn);
if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
{
tree orig_type;
in which the data reference should be analyzed. */
struct data_reference *
-create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt,
+create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
bool is_read, bool is_conditional_in_stmt)
{
struct data_reference *dr;
DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
dr_analyze_innermost (&DR_INNERMOST (dr), memref,
- nest != NULL ? loop : NULL);
+ nest != NULL ? loop : NULL, stmt);
dr_analyze_indices (dr, nest, loop);
dr_analyze_alias (dr);
return dr;
}
-/* A helper function computes order between two tree epxressions T1 and T2.
+/* A helper function computes order between two tree expressions T1 and T2.
This is used in comparator functions sorting objects based on the order
of tree expressions. The function returns -1, 0, or 1. */
break;
default:
+ if (POLY_INT_CST_P (t1))
+ return compare_sizes_for_sort (wi::to_poly_widest (t1),
+ wi::to_poly_widest (t2));
+
tclass = TREE_CODE_CLASS (code);
/* For decls, compare their UIDs. */
/* Return TRUE it's possible to resolve data dependence DDR by runtime alias
check. */
-bool
-runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
+opt_result
+runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
{
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE,
+ "consider run-time aliasing test between %T and %T\n",
+ DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
if (!speed_p)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported when optimizing "
- "for size.\n");
- return false;
- }
+ return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
+ "runtime alias check not supported when"
+ " optimizing for size.\n");
/* FORNOW: We don't support versioning with outer-loop in either
vectorization or loop distribution. */
if (loop != NULL && loop->inner != NULL)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported for outer loop.\n");
- return false;
- }
-
- /* FORNOW: We don't support creating runtime alias tests for non-constant
- step. */
- if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
- || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported for non-constant "
- "step\n");
- return false;
- }
+ return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
+ "runtime alias check not supported for"
+ " outer loop.\n");
- return true;
+ return opt_result::success ();
}
/* Operator == between two dr_with_seg_len objects.
operator == (const dr_with_seg_len& d1,
const dr_with_seg_len& d2)
{
- return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
- DR_BASE_ADDRESS (d2.dr), 0)
- && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
- && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
- && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
+ return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
+ DR_BASE_ADDRESS (d2.dr), 0)
+ && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
+ && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
+ && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
+ && known_eq (d1.access_size, d2.access_size)
+ && d1.align == d2.align);
}
/* Comparison function for sorting objects of dr_with_seg_len_pair_t
return 0;
}
+/* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
+
+static void
+dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
+{
+ dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
+ DR_REF (alias_pair->first.dr),
+ DR_REF (alias_pair->second.dr));
+
+ dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
+ alias_pair->first.seg_len);
+ if (!operand_equal_p (alias_pair->first.seg_len,
+ alias_pair->second.seg_len, 0))
+ dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
+
+ dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
+ dump_dec (MSG_NOTE, alias_pair->first.access_size);
+ if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
+ {
+ dump_printf (MSG_NOTE, " vs. ");
+ dump_dec (MSG_NOTE, alias_pair->second.access_size);
+ }
+
+ dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
+ alias_pair->first.align);
+ if (alias_pair->first.align != alias_pair->second.align)
+ dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
+
+ dump_printf (MSG_NOTE, "\n%sflags: ", indent);
+ if (alias_pair->flags & DR_ALIAS_RAW)
+ dump_printf (MSG_NOTE, " RAW");
+ if (alias_pair->flags & DR_ALIAS_WAR)
+ dump_printf (MSG_NOTE, " WAR");
+ if (alias_pair->flags & DR_ALIAS_WAW)
+ dump_printf (MSG_NOTE, " WAW");
+ if (alias_pair->flags & DR_ALIAS_ARBITRARY)
+ dump_printf (MSG_NOTE, " ARBITRARY");
+ if (alias_pair->flags & DR_ALIAS_SWAPPED)
+ dump_printf (MSG_NOTE, " SWAPPED");
+ if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
+ dump_printf (MSG_NOTE, " UNSWAPPED");
+ if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
+ dump_printf (MSG_NOTE, " MIXED_STEPS");
+ if (alias_pair->flags == 0)
+ dump_printf (MSG_NOTE, " <none>");
+ dump_printf (MSG_NOTE, "\n");
+}
+
/* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
FACTOR is number of iterations that each data reference is accessed.
void
prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
- unsigned HOST_WIDE_INT factor)
+ poly_uint64)
{
+ if (alias_pairs->is_empty ())
+ return;
+
+ /* Canonicalize each pair so that the base components are ordered wrt
+ data_ref_compare_tree. This allows the loop below to merge more
+ cases. */
+ unsigned int i;
+ dr_with_seg_len_pair_t *alias_pair;
+ FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
+ {
+ data_reference_p dr_a = alias_pair->first.dr;
+ data_reference_p dr_b = alias_pair->second.dr;
+ int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
+ DR_BASE_ADDRESS (dr_b));
+ if (comp_res == 0)
+ comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
+ if (comp_res == 0)
+ comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
+ if (comp_res > 0)
+ {
+ std::swap (alias_pair->first, alias_pair->second);
+ alias_pair->flags |= DR_ALIAS_SWAPPED;
+ }
+ else
+ alias_pair->flags |= DR_ALIAS_UNSWAPPED;
+ }
+
/* Sort the collected data ref pairs so that we can scan them once to
combine all possible aliasing checks. */
alias_pairs->qsort (comp_dr_with_seg_len_pair);
/* Scan the sorted dr pairs and check if we can combine alias checks
of two neighboring dr pairs. */
- for (size_t i = 1; i < alias_pairs->length (); ++i)
+ unsigned int last = 0;
+ for (i = 1; i < alias_pairs->length (); ++i)
{
/* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
- dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
- *dr_b1 = &(*alias_pairs)[i-1].second,
- *dr_a2 = &(*alias_pairs)[i].first,
- *dr_b2 = &(*alias_pairs)[i].second;
+ dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
+ dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
+
+ dr_with_seg_len *dr_a1 = &alias_pair1->first;
+ dr_with_seg_len *dr_b1 = &alias_pair1->second;
+ dr_with_seg_len *dr_a2 = &alias_pair2->first;
+ dr_with_seg_len *dr_b2 = &alias_pair2->second;
/* Remove duplicate data ref pairs. */
if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
{
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "found equal ranges ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
- alias_pairs->ordered_remove (i--);
+ dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
+ DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
+ alias_pair1->flags |= alias_pair2->flags;
continue;
}
+ /* Assume that we won't be able to merge the pairs, then correct
+ if we do. */
+ last += 1;
+ if (last != i)
+ (*alias_pairs)[last] = (*alias_pairs)[i];
+
if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
{
/* We consider the case that DR_B1 and DR_B2 are same memrefs,
std::swap (dr_a2, dr_b2);
}
+ poly_int64 init_a1, init_a2;
+ /* Only consider cases in which the distance between the initial
+ DR_A1 and the initial DR_A2 is known at compile time. */
if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
DR_BASE_ADDRESS (dr_a2->dr), 0)
|| !operand_equal_p (DR_OFFSET (dr_a1->dr),
DR_OFFSET (dr_a2->dr), 0)
- || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
- || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
+ || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
+ || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
continue;
- /* Only merge const step data references. */
- if (TREE_CODE (DR_STEP (dr_a1->dr)) != INTEGER_CST
- || TREE_CODE (DR_STEP (dr_a2->dr)) != INTEGER_CST)
+ /* Don't combine if we can't tell which one comes first. */
+ if (!ordered_p (init_a1, init_a2))
continue;
- /* DR_A1 and DR_A2 must goes in the same direction. */
- if (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node)
- != tree_int_cst_compare (DR_STEP (dr_a2->dr), size_zero_node))
- continue;
+ /* Work out what the segment length would be if we did combine
+ DR_A1 and DR_A2:
- bool neg_step
- = (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node) < 0);
+ - If DR_A1 and DR_A2 have equal lengths, that length is
+ also the combined length.
- /* We need to compute merged segment length at compilation time for
- dr_a1 and dr_a2, which is impossible if either one has non-const
- segment length. */
- if ((!tree_fits_uhwi_p (dr_a1->seg_len)
- || !tree_fits_uhwi_p (dr_a2->seg_len))
- && tree_int_cst_compare (DR_STEP (dr_a1->dr),
- DR_STEP (dr_a2->dr)) != 0)
- continue;
+ - If DR_A1 and DR_A2 both have negative "lengths", the combined
+ length is the lower bound on those lengths.
+
+ - If DR_A1 and DR_A2 both have positive lengths, the combined
+ length is the upper bound on those lengths.
+
+ Other cases are unlikely to give a useful combination.
+
+ The lengths both have sizetype, so the sign is taken from
+ the step instead. */
+ poly_uint64 new_seg_len = 0;
+ bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
+ dr_a2->seg_len, 0);
+ if (new_seg_len_p)
+ {
+ poly_uint64 seg_len_a1, seg_len_a2;
+ if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
+ || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
+ continue;
+
+ tree indicator_a = dr_direction_indicator (dr_a1->dr);
+ if (TREE_CODE (indicator_a) != INTEGER_CST)
+ continue;
+
+ tree indicator_b = dr_direction_indicator (dr_a2->dr);
+ if (TREE_CODE (indicator_b) != INTEGER_CST)
+ continue;
+
+ int sign_a = tree_int_cst_sgn (indicator_a);
+ int sign_b = tree_int_cst_sgn (indicator_b);
+
+ if (sign_a <= 0 && sign_b <= 0)
+ new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
+ else if (sign_a >= 0 && sign_b >= 0)
+ new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
+ else
+ continue;
+ }
+ /* At this point we're committed to merging the refs. */
/* Make sure dr_a1 starts left of dr_a2. */
- if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
- std::swap (*dr_a1, *dr_a2);
+ if (maybe_gt (init_a1, init_a2))
+ {
+ std::swap (*dr_a1, *dr_a2);
+ std::swap (init_a1, init_a2);
+ }
- bool do_remove = false;
- wide_int diff = (wi::to_wide (DR_INIT (dr_a2->dr))
- - wi::to_wide (DR_INIT (dr_a1->dr)));
- wide_int min_seg_len_b;
- tree new_seg_len;
+ /* The DR_Bs are equal, so only the DR_As can introduce
+ mixed steps. */
+ if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
+ alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
- if (TREE_CODE (dr_b1->seg_len) == INTEGER_CST)
- min_seg_len_b = wi::abs (wi::to_wide (dr_b1->seg_len));
- else
- min_seg_len_b
- = factor * wi::abs (wi::to_wide (DR_STEP (dr_b1->dr)));
+ if (new_seg_len_p)
+ {
+ dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
+ new_seg_len);
+ dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
+ }
- /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
+ /* This is always positive due to the swap above. */
+ poly_uint64 diff = init_a2 - init_a1;
- Case A:
- check if the following condition is satisfied:
+ /* The new check will start at DR_A1. Make sure that its access
+ size encompasses the initial DR_A2. */
+ if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
+ {
+ dr_a1->access_size = upper_bound (dr_a1->access_size,
+ diff + dr_a2->access_size);
+ unsigned int new_align = known_alignment (dr_a1->access_size);
+ dr_a1->align = MIN (dr_a1->align, new_align);
+ }
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
+ DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
+ alias_pair1->flags |= alias_pair2->flags;
+ last -= 1;
+ }
+ }
+ alias_pairs->truncate (last + 1);
- DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
+ /* Try to restore the original dr_with_seg_len order within each
+ dr_with_seg_len_pair_t. If we ended up combining swapped and
+ unswapped pairs into the same check, we have to invalidate any
+ RAW, WAR and WAW information for it. */
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "merged alias checks:\n");
+ FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
+ {
+ unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
+ unsigned int swapped = (alias_pair->flags & swap_mask);
+ if (swapped == DR_ALIAS_SWAPPED)
+ std::swap (alias_pair->first, alias_pair->second);
+ else if (swapped != DR_ALIAS_UNSWAPPED)
+ alias_pair->flags |= DR_ALIAS_ARBITRARY;
+ alias_pair->flags &= ~swap_mask;
+ if (dump_enabled_p ())
+ dump_alias_pair (alias_pair, " ");
+ }
+}
- where DIFF = DR_A2_INIT - DR_A1_INIT. However,
- SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
- have to make a best estimation. We can get the minimum value
- of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
- then either of the following two conditions can guarantee the
- one above:
+/* A subroutine of create_intersect_range_checks, with a subset of the
+ same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
+ to optimize cases in which the references form a simple RAW, WAR or
+ WAR dependence. */
- 1: DIFF <= MIN_SEG_LEN_B
- 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
- Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
- to take care of wrapping behavior in it.
+static bool
+create_ifn_alias_checks (tree *cond_expr,
+ const dr_with_seg_len_pair_t &alias_pair)
+{
+ const dr_with_seg_len& dr_a = alias_pair.first;
+ const dr_with_seg_len& dr_b = alias_pair.second;
- Case B:
- If the left segment does not extend beyond the start of the
- right segment the new segment length is that of the right
- plus the segment distance. The condition is like:
+ /* Check for cases in which:
- DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
+ (a) we have a known RAW, WAR or WAR dependence
+ (b) the accesses are well-ordered in both the original and new code
+ (see the comment above the DR_ALIAS_* flags for details); and
+ (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
+ if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
+ return false;
- Note 1: Case A.2 and B combined together effectively merges every
- dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
+ /* Make sure that both DRs access the same pattern of bytes,
+ with a constant length and and step. */
+ poly_uint64 seg_len;
+ if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
+ || !poly_int_tree_p (dr_a.seg_len, &seg_len)
+ || maybe_ne (dr_a.access_size, dr_b.access_size)
+ || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
+ || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
+ return false;
- Note 2: Above description is based on positive DR_STEP, we need to
- take care of negative DR_STEP for wrapping behavior. See PR80815
- for more information. */
- if (neg_step)
- {
- /* Adjust diff according to access size of both references. */
- tree size_a1 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)));
- tree size_a2 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)));
- diff += wi::to_wide (size_a2) - wi::to_wide (size_a1);
- /* Case A.1. */
- if (wi::leu_p (diff, min_seg_len_b)
- /* Case A.2 and B combined. */
- || (tree_fits_uhwi_p (dr_a2->seg_len)))
- {
- if (tree_fits_uhwi_p (dr_a1->seg_len)
- && tree_fits_uhwi_p (dr_a2->seg_len))
- {
- wide_int min_len
- = wi::umin (wi::to_wide (dr_a1->seg_len) - diff,
- wi::to_wide (dr_a2->seg_len));
- new_seg_len = wide_int_to_tree (sizetype, min_len);
- }
- else
- new_seg_len
- = size_binop (MINUS_EXPR, dr_a2->seg_len,
- wide_int_to_tree (sizetype, diff));
+ unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
+ tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
+ tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
+
+ /* See whether the target suports what we want to do. WAW checks are
+ equivalent to WAR checks here. */
+ internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
+ ? IFN_CHECK_RAW_PTRS
+ : IFN_CHECK_WAR_PTRS);
+ unsigned int align = MIN (dr_a.align, dr_b.align);
+ poly_uint64 full_length = seg_len + bytes;
+ if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
+ full_length, align))
+ {
+ full_length = seg_len + dr_a.access_size;
+ if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
+ full_length, align))
+ return false;
+ }
- dr_a2->seg_len = new_seg_len;
- do_remove = true;
- }
- }
- else
- {
- /* Case A.1. */
- if (wi::leu_p (diff, min_seg_len_b)
- /* Case A.2 and B combined. */
- || (tree_fits_uhwi_p (dr_a1->seg_len)))
- {
- if (tree_fits_uhwi_p (dr_a1->seg_len)
- && tree_fits_uhwi_p (dr_a2->seg_len))
- {
- wide_int max_len
- = wi::umax (wi::to_wide (dr_a2->seg_len) + diff,
- wi::to_wide (dr_a1->seg_len));
- new_seg_len = wide_int_to_tree (sizetype, max_len);
- }
- else
- new_seg_len
- = size_binop (PLUS_EXPR, dr_a2->seg_len,
- wide_int_to_tree (sizetype, diff));
+ /* Commit to using this form of test. */
+ addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
+ addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
- dr_a1->seg_len = new_seg_len;
- do_remove = true;
- }
- }
+ addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
+ addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
- if (do_remove)
- {
- if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "merging ranges for ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
- alias_pairs->ordered_remove (neg_step ? i - 1 : i);
- i--;
- }
- }
+ *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
+ ifn, boolean_type_node,
+ 4, addr_a, addr_b,
+ size_int (full_length),
+ size_int (align));
+
+ if (dump_enabled_p ())
+ {
+ if (ifn == IFN_CHECK_RAW_PTRS)
+ dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
+ else
+ dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
}
+ return true;
}
-/* Given LOOP's two data references and segment lengths described by DR_A
- and DR_B, create expression checking if the two addresses ranges intersect
- with each other based on index of the two addresses. This can only be
- done if DR_A and DR_B referring to the same (array) object and the index
- is the only difference. For example:
+/* Try to generate a runtime condition that is true if ALIAS_PAIR is
+ free of aliases, using a condition based on index values instead
+ of a condition based on addresses. Return true on success,
+ storing the condition in *COND_EXPR.
+
+ This can only be done if the two data references in ALIAS_PAIR access
+ the same array object and the index is the only difference. For example,
+ if the two data references are DR_A and DR_B:
DR_A DR_B
data-ref arr[i] arr[j]
We can create expression based on index rather than address:
- (i_0 + 4 < j_0 || j_0 + 4 < i_0)
+ (unsigned) (i_0 - j_0 + 3) <= 6
+
+ i.e. the indices are less than 4 apart.
Note evolution step of index needs to be considered in comparison. */
static bool
-create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
- const dr_with_seg_len& dr_a,
- const dr_with_seg_len& dr_b)
+create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
+ const dr_with_seg_len_pair_t &alias_pair)
{
- if (integer_zerop (DR_STEP (dr_a.dr))
+ const dr_with_seg_len &dr_a = alias_pair.first;
+ const dr_with_seg_len &dr_b = alias_pair.second;
+ if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
+ || integer_zerop (DR_STEP (dr_a.dr))
|| integer_zerop (DR_STEP (dr_b.dr))
|| DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
return false;
- if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
+ poly_uint64 seg_len1, seg_len2;
+ if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
+ || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
return false;
if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
- unsigned HOST_WIDE_INT abs_step
- = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
+ unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
+ if (neg_step)
+ {
+ abs_step = -abs_step;
+ seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
+ seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
+ }
- unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
- unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
/* Infer the number of iterations with which the memory segment is accessed
by DR. In other words, alias is checked if memory segment accessed by
DR_A in some iterations intersect with memory segment accessed by DR_B
in the same amount iterations.
Note segnment length is a linear function of number of iterations with
DR_STEP as the coefficient. */
- unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
- unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
+ poly_uint64 niter_len1, niter_len2;
+ if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
+ || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
+ return false;
+
+ /* Divide each access size by the byte step, rounding up. */
+ poly_uint64 niter_access1, niter_access2;
+ if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
+ abs_step, &niter_access1)
+ || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
+ abs_step, &niter_access2))
+ return false;
+
+ bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
unsigned int i;
for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
index of data reference. Like segment length, index length is
linear function of the number of iterations with index_step as
the coefficient, i.e, niter_len * idx_step. */
- tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
- build_int_cst (TREE_TYPE (min1),
- niter_len1));
- tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
- build_int_cst (TREE_TYPE (min2),
- niter_len2));
- tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
- tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
- /* Adjust ranges for negative step. */
+ offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
+ SIGNED);
if (neg_step)
- {
- min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
- max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
- CHREC_LEFT (access1), idx_step);
- min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
- max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
- CHREC_LEFT (access2), idx_step);
- }
- tree part_cond_expr
- = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
- fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
- fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
+ abs_idx_step = -abs_idx_step;
+ poly_offset_int idx_len1 = abs_idx_step * niter_len1;
+ poly_offset_int idx_len2 = abs_idx_step * niter_len2;
+ poly_offset_int idx_access1 = abs_idx_step * niter_access1;
+ poly_offset_int idx_access2 = abs_idx_step * niter_access2;
+
+ gcc_assert (known_ge (idx_len1, 0)
+ && known_ge (idx_len2, 0)
+ && known_ge (idx_access1, 0)
+ && known_ge (idx_access2, 0));
+
+ /* Each access has the following pattern, with lengths measured
+ in units of INDEX:
+
+ <-- idx_len -->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ..... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <-- idx_len -->
+ |
+ min
+
+ where "n" is the number of scalar iterations covered by the segment
+ and where each access spans idx_access units.
+
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
+
+ When checking for general overlap, we need to test whether
+ the range:
+
+ [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
+
+ overlaps:
+
+ [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
+
+ where:
+
+ low_offsetN = +ve step ? 0 : -idx_lenN;
+ high_offsetN = +ve step ? idx_lenN : 0;
+
+ This is equivalent to testing whether:
+
+ min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
+ && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
+
+ Converting this into a single test, there is an overlap if:
+
+ 0 <= min2 - min1 + bias <= limit
+
+ where bias = high_offset2 + idx_access2 - 1 - low_offset1
+ limit = (high_offset1 - low_offset1 + idx_access1 - 1)
+ + (high_offset2 - low_offset2 + idx_access2 - 1)
+ i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
+
+ Combining the tests requires limit to be computable in an unsigned
+ form of the index type; if it isn't, we fall back to the usual
+ pointer-based checks.
+
+ We can do better if DR_B is a write and if DR_A and DR_B are
+ well-ordered in both the original and the new code (see the
+ comment above the DR_ALIAS_* flags for details). In this case
+ we know that for each i in [0, n-1], the write performed by
+ access i of DR_B occurs after access numbers j<=i of DR_A in
+ both the original and the new code. Any write or anti
+ dependencies wrt those DR_A accesses are therefore maintained.
+
+ We just need to make sure that each individual write in DR_B does not
+ overlap any higher-indexed access in DR_A; such DR_A accesses happen
+ after the DR_B access in the original code but happen before it in
+ the new code.
+
+ We know the steps for both accesses are equal, so by induction, we
+ just need to test whether the first write of DR_B overlaps a later
+ access of DR_A. In other words, we need to move min1 along by
+ one iteration:
+
+ min1' = min1 + idx_step
+
+ and use the ranges:
+
+ [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
+
+ and:
+
+ [min2, min2 + idx_access2 - 1]
+
+ where:
+
+ low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
+ high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
+ if (waw_or_war_p)
+ idx_len1 -= abs_idx_step;
+
+ poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
+ if (!waw_or_war_p)
+ limit += idx_len2;
+
+ tree utype = unsigned_type_for (TREE_TYPE (min1));
+ if (!wi::fits_to_tree_p (limit, utype))
+ return false;
+
+ poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
+ poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
+ poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
+ /* Equivalent to adding IDX_STEP to MIN1. */
+ if (waw_or_war_p)
+ bias -= wi::to_offset (idx_step);
+
+ tree subject = fold_build2 (MINUS_EXPR, utype,
+ fold_convert (utype, min2),
+ fold_convert (utype, min1));
+ subject = fold_build2 (PLUS_EXPR, utype, subject,
+ wide_int_to_tree (utype, bias));
+ tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
+ wide_int_to_tree (utype, limit));
if (*cond_expr)
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
*cond_expr, part_cond_expr);
else
*cond_expr = part_cond_expr;
}
+ if (dump_enabled_p ())
+ {
+ if (waw_or_war_p)
+ dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
+ else
+ dump_printf (MSG_NOTE, "using an index-based overlap test\n");
+ }
+ return true;
+}
+
+/* A subroutine of create_intersect_range_checks, with a subset of the
+ same arguments. Try to optimize cases in which the second access
+ is a write and in which some overlap is valid. */
+
+static bool
+create_waw_or_war_checks (tree *cond_expr,
+ const dr_with_seg_len_pair_t &alias_pair)
+{
+ const dr_with_seg_len& dr_a = alias_pair.first;
+ const dr_with_seg_len& dr_b = alias_pair.second;
+
+ /* Check for cases in which:
+
+ (a) DR_B is always a write;
+ (b) the accesses are well-ordered in both the original and new code
+ (see the comment above the DR_ALIAS_* flags for details); and
+ (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
+ if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
+ return false;
+
+ /* Check for equal (but possibly variable) steps. */
+ tree step = DR_STEP (dr_a.dr);
+ if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
+ return false;
+
+ /* Make sure that we can operate on sizetype without loss of precision. */
+ tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
+ if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
+ return false;
+
+ /* All addresses involved are known to have a common alignment ALIGN.
+ We can therefore subtract ALIGN from an exclusive endpoint to get
+ an inclusive endpoint. In the best (and common) case, ALIGN is the
+ same as the access sizes of both DRs, and so subtracting ALIGN
+ cancels out the addition of an access size. */
+ unsigned int align = MIN (dr_a.align, dr_b.align);
+ poly_uint64 last_chunk_a = dr_a.access_size - align;
+ poly_uint64 last_chunk_b = dr_b.access_size - align;
+
+ /* Get a boolean expression that is true when the step is negative. */
+ tree indicator = dr_direction_indicator (dr_a.dr);
+ tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+
+ /* Get lengths in sizetype. */
+ tree seg_len_a
+ = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
+ step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
+
+ /* Each access has the following pattern:
+
+ <- |seg_len| ->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ..... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <- |seg_len| ->
+ |
+ base address
+
+ where "n" is the number of scalar iterations covered by the segment.
+
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
+
+ We know that DR_B is a write. We also know (from checking that
+ DR_A and DR_B are well-ordered) that for each i in [0, n-1],
+ the write performed by access i of DR_B occurs after access numbers
+ j<=i of DR_A in both the original and the new code. Any write or
+ anti dependencies wrt those DR_A accesses are therefore maintained.
+
+ We just need to make sure that each individual write in DR_B does not
+ overlap any higher-indexed access in DR_A; such DR_A accesses happen
+ after the DR_B access in the original code but happen before it in
+ the new code.
+
+ We know the steps for both accesses are equal, so by induction, we
+ just need to test whether the first write of DR_B overlaps a later
+ access of DR_A. In other words, we need to move addr_a along by
+ one iteration:
+
+ addr_a' = addr_a + step
+
+ and check whether:
+
+ [addr_b, addr_b + last_chunk_b]
+
+ overlaps:
+
+ [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
+
+ where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
+
+ low_offset_a = +ve step ? 0 : seg_len_a - step
+ high_offset_a = +ve step ? seg_len_a - step : 0
+
+ This is equivalent to testing whether:
+
+ addr_a' + low_offset_a <= addr_b + last_chunk_b
+ && addr_b <= addr_a' + high_offset_a + last_chunk_a
+
+ Converting this into a single test, there is an overlap if:
+
+ 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
+
+ where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
+
+ If DR_A is performed, limit + |step| - last_chunk_b is known to be
+ less than the size of the object underlying DR_A. We also know
+ that last_chunk_b <= |step|; this is checked elsewhere if it isn't
+ guaranteed at compile time. There can therefore be no overflow if
+ "limit" is calculated in an unsigned type with pointer precision. */
+ tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
+ DR_OFFSET (dr_a.dr));
+ addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
+
+ tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
+ DR_OFFSET (dr_b.dr));
+ addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
+
+ /* Advance ADDR_A by one iteration and adjust the length to compensate. */
+ addr_a = fold_build_pointer_plus (addr_a, step);
+ tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
+ seg_len_a, step);
+ if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
+ seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
+
+ tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
+ seg_len_a_minus_step, size_zero_node);
+ if (!CONSTANT_CLASS_P (low_offset_a))
+ low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
+
+ /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
+ but it's usually more efficient to reuse the LOW_OFFSET_A result. */
+ tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
+ low_offset_a);
+
+ /* The amount added to addr_b - addr_a'. */
+ tree bias = fold_build2 (MINUS_EXPR, sizetype,
+ size_int (last_chunk_b), low_offset_a);
+
+ tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
+ limit = fold_build2 (PLUS_EXPR, sizetype, limit,
+ size_int (last_chunk_a + last_chunk_b));
+
+ tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
+ subject = fold_build2 (PLUS_EXPR, sizetype,
+ fold_convert (sizetype, subject), bias);
+
+ *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
return true;
}
-/* Given two data references and segment lengths described by DR_A and DR_B,
- create expression checking if the two addresses ranges intersect with
- each other:
+/* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
+ every address ADDR accessed by D:
+
+ *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
+
+ In this case, every element accessed by D is aligned to at least
+ ALIGN bytes.
- ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
- || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
+ If ALIGN is zero then instead set *SEG_MAX_OUT so that:
+
+ *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
static void
-create_intersect_range_checks (struct loop *loop, tree *cond_expr,
- const dr_with_seg_len& dr_a,
- const dr_with_seg_len& dr_b)
+get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
+ tree *seg_max_out, HOST_WIDE_INT align)
{
+ /* Each access has the following pattern:
+
+ <- |seg_len| ->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ,.... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <- |seg_len| ->
+ |
+ base address
+
+ where "n" is the number of scalar iterations covered by the segment.
+ (This should be VF for a particular pair if we know that both steps
+ are the same, otherwise it will be the full number of scalar loop
+ iterations.)
+
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
+
+ If the access size is "access_size" bytes, the lowest addressed byte is:
+
+ base + (step < 0 ? seg_len : 0) [LB]
+
+ and the highest addressed byte is always below:
+
+ base + (step < 0 ? 0 : seg_len) + access_size [UB]
+
+ Thus:
+
+ LB <= ADDR < UB
+
+ If ALIGN is nonzero, all three values are aligned to at least ALIGN
+ bytes, so:
+
+ LB <= ADDR <= UB - ALIGN
+
+ where "- ALIGN" folds naturally with the "+ access_size" and often
+ cancels it out.
+
+ We don't try to simplify LB and UB beyond this (e.g. by using
+ MIN and MAX based on whether seg_len rather than the stride is
+ negative) because it is possible for the absolute size of the
+ segment to overflow the range of a ssize_t.
+
+ Keeping the pointer_plus outside of the cond_expr should allow
+ the cond_exprs to be shared with other alias checks. */
+ tree indicator = dr_direction_indicator (d.dr);
+ tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+ tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
+ DR_OFFSET (d.dr));
+ addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
+ tree seg_len
+ = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
+
+ tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
+ seg_len, size_zero_node);
+ tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
+ size_zero_node, seg_len);
+ max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
+ size_int (d.access_size - align));
+
+ *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
+ *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
+}
+
+/* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
+ storing the condition in *COND_EXPR. The fallback is to generate a
+ a test that the two accesses do not overlap:
+
+ end_a <= start_b || end_b <= start_a. */
+
+static void
+create_intersect_range_checks (class loop *loop, tree *cond_expr,
+ const dr_with_seg_len_pair_t &alias_pair)
+{
+ const dr_with_seg_len& dr_a = alias_pair.first;
+ const dr_with_seg_len& dr_b = alias_pair.second;
*cond_expr = NULL_TREE;
- if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
+ if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
+ return;
+
+ if (create_ifn_alias_checks (cond_expr, alias_pair))
+ return;
+
+ if (create_waw_or_war_checks (cond_expr, alias_pair))
return;
- tree segment_length_a = dr_a.seg_len;
- tree segment_length_b = dr_b.seg_len;
- tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
- tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
- tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
-
- offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
- offset_a, DR_INIT (dr_a.dr));
- offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
- offset_b, DR_INIT (dr_b.dr));
- addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
- addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
-
- tree seg_a_min = addr_base_a;
- tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
- /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
- bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
- [a, a+12) */
- if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
+ unsigned HOST_WIDE_INT min_align;
+ tree_code cmp_code;
+ /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
+ are equivalent. This is just an optimization heuristic. */
+ if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
+ && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
{
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
- seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
- seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
+ /* In this case adding access_size to seg_len is likely to give
+ a simple X * step, where X is either the number of scalar
+ iterations or the vectorization factor. We're better off
+ keeping that, rather than subtracting an alignment from it.
+
+ In this case the maximum values are exclusive and so there is
+ no alias if the maximum of one segment equals the minimum
+ of another. */
+ min_align = 0;
+ cmp_code = LE_EXPR;
}
-
- tree seg_b_min = addr_base_b;
- tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
- if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
+ else
{
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
- seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
- seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
+ /* Calculate the minimum alignment shared by all four pointers,
+ then arrange for this alignment to be subtracted from the
+ exclusive maximum values to get inclusive maximum values.
+ This "- min_align" is cumulative with a "+ access_size"
+ in the calculation of the maximum values. In the best
+ (and common) case, the two cancel each other out, leaving
+ us with an inclusive bound based only on seg_len. In the
+ worst case we're simply adding a smaller number than before.
+
+ Because the maximum values are inclusive, there is an alias
+ if the maximum value of one segment is equal to the minimum
+ value of the other. */
+ min_align = MIN (dr_a.align, dr_b.align);
+ cmp_code = LT_EXPR;
}
+
+ tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
+ get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
+ get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
+
*cond_expr
= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
- fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
- fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
+ fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
+ fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "using an address-based overlap test\n");
}
/* Create a conditional expression that represents the run-time checks for
that controls which version of the loop gets executed at runtime. */
void
-create_runtime_alias_checks (struct loop *loop,
+create_runtime_alias_checks (class loop *loop,
vec<dr_with_seg_len_pair_t> *alias_pairs,
tree * cond_expr)
{
tree part_cond_expr;
- for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
+ fold_defer_overflow_warnings ();
+ dr_with_seg_len_pair_t *alias_pair;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
{
- const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
- const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
-
+ gcc_assert (alias_pair->flags);
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "create runtime check for data references ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE,
+ "create runtime check for data references %T and %T\n",
+ DR_REF (alias_pair->first.dr),
+ DR_REF (alias_pair->second.dr));
/* Create condition expression for each pair data references. */
- create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
+ create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
if (*cond_expr)
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
*cond_expr, part_cond_expr);
else
*cond_expr = part_cond_expr;
}
+ fold_undefer_and_ignore_overflow_warnings ();
}
/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
/* Returns true if the address of OBJ is invariant in LOOP. */
static bool
-object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
+object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
{
while (handled_component_p (obj))
{
if (TREE_CODE (obj) == ARRAY_REF)
{
- /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
- need to check the stride and the lower bound of the reference. */
- if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
- loop->num)
- || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
- loop->num))
- return false;
+ for (int i = 1; i < 4; ++i)
+ if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
+ loop->num))
+ return false;
}
else if (TREE_CODE (obj) == COMPONENT_REF)
{
bool
dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
- bool loop_nest)
+ class loop *loop_nest)
{
tree addr_a = DR_BASE_OBJECT (a);
tree addr_b = DR_BASE_OBJECT (b);
if (!loop_nest)
{
aff_tree off1, off2;
- widest_int size1, size2;
+ poly_widest_int size1, size2;
get_inner_reference_aff (DR_REF (a), &off1, &size1);
get_inner_reference_aff (DR_REF (b), &off2, &size2);
aff_combination_scale (&off1, -1);
if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
&& (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
+ /* For cross-iteration dependences the cliques must be valid for the
+ whole loop, not just individual iterations. */
+ && (!loop_nest
+ || MR_DEPENDENCE_CLIQUE (addr_a) == 1
+ || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
&& MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
&& MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
return false;
}
/* If the data references do not alias, then they are independent. */
- if (!dr_may_alias_p (a, b, loop_nest.exists ()))
+ if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
{
DDR_ARE_DEPENDENT (res) = chrec_known;
return res;
DDR_ARE_DEPENDENT (res) = NULL_TREE;
DDR_SUBSCRIPTS (res).create (full_seq.length);
DDR_LOOP_NEST (res) = loop_nest;
- DDR_INNER_LOOP (res) = 0;
DDR_SELF_REFERENCE (res) = false;
for (i = 0; i < full_seq.length; ++i)
conflict_function *ret = XCNEW (conflict_function);
va_list ap;
- gcc_assert (0 < n && n <= MAX_DIM);
+ gcc_assert (n > 0 && n <= MAX_DIM);
va_start (ap, n);
ret->n = n;
chrec_dont_know. */
static tree
-max_stmt_executions_tree (struct loop *loop)
+max_stmt_executions_tree (class loop *loop)
{
widest_int nit;
{
if (value0 == false)
{
- if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
+ if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
+ || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "siv test failed: chrec not positive.\n");
if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
{
HOST_WIDE_INT numiter;
- struct loop *loop = get_chrec_loop (chrec_b);
+ class loop *loop = get_chrec_loop (chrec_b);
*overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
tmp = fold_build2 (EXACT_DIV_EXPR, type,
}
else
{
- if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
+ if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
+ || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "siv test failed: chrec not positive.\n");
if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
{
HOST_WIDE_INT numiter;
- struct loop *loop = get_chrec_loop (chrec_b);
+ class loop *loop = get_chrec_loop (chrec_b);
*overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
switch (TREE_CODE (chrec))
{
case POLYNOMIAL_CHREC:
+ if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
+ return chrec_dont_know;
A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
mat[i][j] = (i == j) ? 1 : 0;
}
-/* Return the first nonzero element of vector VEC1 between START and N.
- We must have START <= N. Returns N if VEC1 is the zero vector. */
+/* Return the index of the first nonzero element of vector VEC1 between
+ START and N. We must have START <= N.
+ Returns N if VEC1 is the zero vector. */
static int
lambda_vector_first_nz (lambda_vector vec1, int n, int start)
R2 = R2 + CONST1 * R1. */
static void
-lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
+lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
+ lambda_int const1)
{
int i;
static void
lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
- int size, int const1)
+ int size, lambda_int const1)
{
int i;
{
while (S[i][j] != 0)
{
- int sigma, factor, a, b;
+ lambda_int sigma, factor, a, b;
a = S[i-1][j];
b = S[i][j];
sigma = (a * b < 0) ? -1: 1;
- a = abs (a);
- b = abs (b);
+ a = abs_hwi (a);
+ b = abs_hwi (b);
factor = sigma * (a / b);
lambda_matrix_row_add (S, n, i, i-1, -factor);
tree *last_conflicts)
{
unsigned nb_vars_a, nb_vars_b, dim;
- HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
+ HOST_WIDE_INT gamma, gcd_alpha_beta;
lambda_matrix A, U, S;
struct obstack scratch_obstack;
A = lambda_matrix_new (dim, 1, &scratch_obstack);
S = lambda_matrix_new (dim, 1, &scratch_obstack);
- init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
- init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
- gamma = init_b - init_a;
+ tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
+ tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
+ if (init_a == chrec_dont_know
+ || init_b == chrec_dont_know)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "affine-affine test failed: "
+ "representation issue.\n");
+ *overlaps_a = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
+ *last_conflicts = chrec_dont_know;
+ goto end_analyze_subs_aa;
+ }
+ gamma = int_cst_value (init_b) - int_cst_value (init_a);
/* Don't do all the hard work of solving the Diophantine equation
when we already know the solution: for example,
if (niter > 0)
{
- HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
- FLOOR_DIV (niter_b - j0, j1));
- HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
-
/* If the overlap occurs outside of the bounds of the
loop, there is no dependence. */
if (x1 >= niter_a || y1 >= niter_b)
*last_conflicts = integer_zero_node;
goto end_analyze_subs_aa;
}
+
+ /* max stmt executions can get quite large, avoid
+ overflows by using wide ints here. */
+ widest_int tau2
+ = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
+ wi::sdiv_floor (wi::sub (niter_b, j0), j1));
+ widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
+ if (wi::min_precision (last_conflict, SIGNED)
+ <= TYPE_PRECISION (integer_type_node))
+ *last_conflicts
+ = build_int_cst (integer_type_node,
+ last_conflict.to_shwi ());
else
- *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
+ *last_conflicts = chrec_dont_know;
}
else
*last_conflicts = chrec_dont_know;
conflict_function **overlaps_a,
conflict_function **overlaps_b,
tree *last_conflicts,
- struct loop *loop_nest)
+ class loop *loop_nest)
{
tree type, difference;
}
else if (evolution_function_is_constant_p (difference)
- /* For the moment, the following is verified:
- evolution_function_is_affine_multivariate_p (chrec_a,
- loop_nest->num) */
+ && evolution_function_is_affine_multivariate_p (chrec_a,
+ loop_nest->num)
&& !gcd_of_steps_may_divide_p (chrec_a, difference))
{
/* testsuite/.../ssa-chrec-33.c
dependence_stats.num_miv_independent++;
}
- else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
- && !chrec_contains_symbols (chrec_a)
- && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
- && !chrec_contains_symbols (chrec_b))
+ else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
+ && !chrec_contains_symbols (chrec_a, loop_nest)
+ && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
+ && !chrec_contains_symbols (chrec_b, loop_nest))
{
/* testsuite/.../ssa-chrec-35.c
{0, +, 1}_2 vs. {0, +, 1}_3
tree chrec_b,
conflict_function **overlap_iterations_a,
conflict_function **overlap_iterations_b,
- tree *last_conflicts, struct loop *loop_nest)
+ tree *last_conflicts, class loop *loop_nest)
{
unsigned int lnn = loop_nest->num;
{
unsigned i;
lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
+ class loop *loop = DDR_LOOP_NEST (ddr)[0];
for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
{
return false;
}
+ /* When data references are collected in a loop while data
+ dependences are analyzed in loop nest nested in the loop, we
+ would have more number of access functions than number of
+ loops. Skip access functions of loops not in the loop nest.
+
+ See PR89725 for more information. */
+ if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
+ continue;
+
dist = int_cst_value (SUB_DISTANCE (subscript));
index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
*index_carry = MIN (index, *index_carry);
unsigned i;
int index_carry = DDR_NB_LOOPS (ddr);
subscript *sub;
+ class loop *loop = DDR_LOOP_NEST (ddr)[0];
FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
{
if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
{
- if (!evolution_function_is_univariate_p (access_fun))
+ if (!evolution_function_is_univariate_p (access_fun, loop->num))
{
if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
{
return;
}
+ /* When data references are collected in a loop while data
+ dependences are analyzed in loop nest nested in the loop, we
+ would have more number of access functions than number of
+ loops. Skip access functions of loops not in the loop nest.
+
+ See PR89725 for more information. */
+ if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
+ loop))
+ continue;
+
index_carry = MIN (index_carry,
index_in_loop_nest (CHREC_VARIABLE (access_fun),
DDR_LOOP_NEST (ddr)));
{
lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
- dist_v[DDR_INNER_LOOP (ddr)] = 1;
+ dist_v[0] = 1;
save_dist_v (ddr, dist_v);
}
static bool
build_classic_dist_vector (struct data_dependence_relation *ddr,
- struct loop *loop_nest)
+ class loop *loop_nest)
{
bool init_b = false;
int index_carry = DDR_NB_LOOPS (ddr);
static bool
subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
unsigned int a_index, unsigned int b_index,
- struct loop *loop_nest)
+ class loop *loop_nest)
{
unsigned int i;
tree last_conflicts;
static void
subscript_dependence_tester (struct data_dependence_relation *ddr,
- struct loop *loop_nest)
+ class loop *loop_nest)
{
if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
dependence_stats.num_dependence_dependent++;
static bool
access_functions_are_affine_or_constant_p (const struct data_reference *a,
- const struct loop *loop_nest)
+ const class loop *loop_nest)
{
unsigned int i;
vec<tree> fns = DR_ACCESS_FNS (a);
void
compute_affine_dependence (struct data_dependence_relation *ddr,
- struct loop *loop_nest)
+ class loop *loop_nest)
{
struct data_reference *dra = DDR_A (ddr);
struct data_reference *drb = DDR_B (ddr);
unsigned int i, j;
if ((int) datarefs.length ()
- > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
+ > param_loop_max_datarefs_for_datadeps)
{
struct data_dependence_relation *ddr;
{
case IFN_GOMP_SIMD_LANE:
{
- struct loop *loop = gimple_bb (stmt)->loop_father;
+ class loop *loop = gimple_bb (stmt)->loop_father;
tree uid = gimple_call_arg (stmt, 0);
gcc_assert (TREE_CODE (uid) == SSA_NAME);
if (loop == NULL
reference, returns false, otherwise returns true. NEST is the outermost
loop of the loop nest in which the references should be analyzed. */
-bool
-find_data_references_in_stmt (struct loop *nest, gimple *stmt,
+opt_result
+find_data_references_in_stmt (class loop *nest, gimple *stmt,
vec<data_reference_p> *datarefs)
{
unsigned i;
auto_vec<data_ref_loc, 2> references;
data_ref_loc *ref;
- bool ret = true;
data_reference_p dr;
if (get_references_in_stmt (stmt, &references))
- return false;
+ return opt_result::failure_at (stmt, "statement clobbers memory: %G",
+ stmt);
FOR_EACH_VEC_ELT (references, i, ref)
{
- dr = create_data_ref (nest, loop_containing_stmt (stmt), ref->ref,
+ dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
+ loop_containing_stmt (stmt), ref->ref,
stmt, ref->is_read, ref->is_conditional_in_stmt);
gcc_assert (dr != NULL);
datarefs->safe_push (dr);
}
- return ret;
+ return opt_result::success ();
}
/* Stores the data references in STMT to DATAREFS. If there is an
should be analyzed. */
bool
-graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple *stmt,
+graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
vec<data_reference_p> *datarefs)
{
unsigned i;
difficult case, returns NULL_TREE otherwise. */
tree
-find_data_references_in_bb (struct loop *loop, basic_block bb,
+find_data_references_in_bb (class loop *loop, basic_block bb,
vec<data_reference_p> *datarefs)
{
gimple_stmt_iterator bsi;
arithmetic as if they were array accesses, etc. */
tree
-find_data_references_in_loop (struct loop *loop,
+find_data_references_in_loop (class loop *loop,
vec<data_reference_p> *datarefs)
{
basic_block bb, *bbs;
return alignment;
}
+/* If BASE is a pointer-typed SSA name, try to find the object that it
+ is based on. Return this object X on success and store the alignment
+ in bytes of BASE - &X in *ALIGNMENT_OUT. */
+
+static tree
+get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
+{
+ if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
+ return NULL_TREE;
+
+ gimple *def = SSA_NAME_DEF_STMT (base);
+ base = analyze_scalar_evolution (loop_containing_stmt (def), base);
+
+ /* Peel chrecs and record the minimum alignment preserved by
+ all steps. */
+ unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
+ while (TREE_CODE (base) == POLYNOMIAL_CHREC)
+ {
+ unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
+ alignment = MIN (alignment, step_alignment);
+ base = CHREC_LEFT (base);
+ }
+
+ /* Punt if the expression is too complicated to handle. */
+ if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
+ return NULL_TREE;
+
+ /* The only useful cases are those for which a dereference folds to something
+ other than an INDIRECT_REF. */
+ tree ref_type = TREE_TYPE (TREE_TYPE (base));
+ tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
+ if (!ref)
+ return NULL_TREE;
+
+ /* Analyze the base to which the steps we peeled were applied. */
+ poly_int64 bitsize, bitpos, bytepos;
+ machine_mode mode;
+ int unsignedp, reversep, volatilep;
+ tree offset;
+ base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
+ if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
+ return NULL_TREE;
+
+ /* Restrict the alignment to that guaranteed by the offsets. */
+ unsigned int bytepos_alignment = known_alignment (bytepos);
+ if (bytepos_alignment != 0)
+ alignment = MIN (alignment, bytepos_alignment);
+ if (offset)
+ {
+ unsigned int offset_alignment = highest_pow2_factor (offset);
+ alignment = MIN (alignment, offset_alignment);
+ }
+
+ *alignment_out = alignment;
+ return base;
+}
+
+/* Return the object whose alignment would need to be changed in order
+ to increase the alignment of ADDR. Store the maximum achievable
+ alignment in *MAX_ALIGNMENT. */
+
+tree
+get_base_for_alignment (tree addr, unsigned int *max_alignment)
+{
+ tree base = get_base_for_alignment_1 (addr, max_alignment);
+ if (base)
+ return base;
+
+ if (TREE_CODE (addr) == ADDR_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
+ return addr;
+}
+
/* Recursive helper function. */
static bool
-find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
+find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
{
/* Inner loops of the nest should not contain siblings. Example:
when there are two consecutive loops,
appear in the classic distance vector. */
bool
-find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
+find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
{
loop_nest->safe_push (loop);
if (loop->inner)
COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
bool
-compute_data_dependences_for_loop (struct loop *loop,
+compute_data_dependences_for_loop (class loop *loop,
bool compute_self_and_read_read_dependences,
vec<loop_p> *loop_nest,
vec<data_reference_p> *datarefs,
free_data_ref (dr);
datarefs.release ();
}
+
+/* Common routine implementing both dr_direction_indicator and
+ dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
+ to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
+ Return the step as the indicator otherwise. */
+
+static tree
+dr_step_indicator (struct data_reference *dr, int useful_min)
+{
+ tree step = DR_STEP (dr);
+ if (!step)
+ return NULL_TREE;
+ STRIP_NOPS (step);
+ /* Look for cases where the step is scaled by a positive constant
+ integer, which will often be the access size. If the multiplication
+ doesn't change the sign (due to overflow effects) then we can
+ test the unscaled value instead. */
+ if (TREE_CODE (step) == MULT_EXPR
+ && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
+ && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
+ {
+ tree factor = TREE_OPERAND (step, 1);
+ step = TREE_OPERAND (step, 0);
+
+ /* Strip widening and truncating conversions as well as nops. */
+ if (CONVERT_EXPR_P (step)
+ && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
+ step = TREE_OPERAND (step, 0);
+ tree type = TREE_TYPE (step);
+
+ /* Get the range of step values that would not cause overflow. */
+ widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
+ / wi::to_widest (factor));
+ widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
+ / wi::to_widest (factor));
+
+ /* Get the range of values that the unconverted step actually has. */
+ wide_int step_min, step_max;
+ if (TREE_CODE (step) != SSA_NAME
+ || get_range_info (step, &step_min, &step_max) != VR_RANGE)
+ {
+ step_min = wi::to_wide (TYPE_MIN_VALUE (type));
+ step_max = wi::to_wide (TYPE_MAX_VALUE (type));
+ }
+
+ /* Check whether the unconverted step has an acceptable range. */
+ signop sgn = TYPE_SIGN (type);
+ if (wi::les_p (minv, widest_int::from (step_min, sgn))
+ && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
+ {
+ if (wi::ge_p (step_min, useful_min, sgn))
+ return ssize_int (useful_min);
+ else if (wi::lt_p (step_max, 0, sgn))
+ return ssize_int (-1);
+ else
+ return fold_convert (ssizetype, step);
+ }
+ }
+ return DR_STEP (dr);
+}
+
+/* Return a value that is negative iff DR has a negative step. */
+
+tree
+dr_direction_indicator (struct data_reference *dr)
+{
+ return dr_step_indicator (dr, 0);
+}
+
+/* Return a value that is zero iff DR has a zero step. */
+
+tree
+dr_zero_step_indicator (struct data_reference *dr)
+{
+ return dr_step_indicator (dr, 1);
+}
+
+/* Return true if DR is known to have a nonnegative (but possibly zero)
+ step. */
+
+bool
+dr_known_forward_stride_p (struct data_reference *dr)
+{
+ tree indicator = dr_direction_indicator (dr);
+ tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+ return neg_step_val && integer_zerop (neg_step_val);
+}