/* Data references and dependences detectors.
- Copyright (C) 2003-2017 Free Software Foundation, Inc.
+ Copyright (C) 2003-2020 Free Software Foundation, Inc.
Contributed by Sebastian Pop <pop@cri.ensmp.fr>
This file is part of GCC.
#include "tree-scalar-evolution.h"
#include "dumpfile.h"
#include "tree-affine.h"
-#include "params.h"
+#include "builtins.h"
+#include "tree-eh.h"
+#include "ssa.h"
+#include "internal-fn.h"
static struct datadep_stats
{
} dependence_stats;
static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
- struct data_reference *,
- struct data_reference *,
- struct loop *);
+ unsigned int, unsigned int,
+ class loop *);
/* Returns true iff A divides B. */
static inline bool
return ((b % a) == 0);
}
+/* Return true if reference REF contains a union access. */
+
+static bool
+ref_contains_union_access_p (tree ref)
+{
+ while (handled_component_p (ref))
+ {
+ ref = TREE_OPERAND (ref, 0);
+ if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
+ || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
+ return true;
+ }
+ return false;
+}
+
\f
/* Dump into FILE all the data references from DATAREFS. */
int i;
for (i = 0; i < n; i++)
- fprintf (outfile, "%3d ", vector[i]);
+ fprintf (outfile, "%3d ", (int)vector[i]);
fprintf (outfile, "\n");
}
else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
{
unsigned int i;
- struct loop *loopi;
+ class loop *loopi;
- for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
+ subscript *sub;
+ FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
{
fprintf (outf, " access_fn_A: ");
- print_generic_stmt (outf, DR_ACCESS_FN (dra, i));
+ print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
fprintf (outf, " access_fn_B: ");
- print_generic_stmt (outf, DR_ACCESS_FN (drb, i));
- dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
+ print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
+ dump_subscript (outf, sub);
}
- fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
fprintf (outf, " loop nest: (");
FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
fprintf (outf, "%d ", loopi->num);
dump_ddrs (stderr, ddrs);
}
+static void
+split_constant_offset (tree exp, tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache,
+ unsigned *limit);
+
/* Helper function for split_constant_offset. Expresses OP0 CODE OP1
(the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
constant of type ssizetype, and returns true. If we cannot do this
static bool
split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
- tree *var, tree *off)
+ tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache,
+ unsigned *limit)
{
tree var0, var1;
tree off0, off1;
/* FALLTHROUGH */
case PLUS_EXPR:
case MINUS_EXPR:
- split_constant_offset (op0, &var0, &off0);
- split_constant_offset (op1, &var1, &off1);
+ if (TREE_CODE (op1) == INTEGER_CST)
+ {
+ split_constant_offset (op0, &var0, &off0, cache, limit);
+ *var = var0;
+ *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
+ return true;
+ }
+ split_constant_offset (op0, &var0, &off0, cache, limit);
+ split_constant_offset (op1, &var1, &off1, cache, limit);
*var = fold_build2 (code, type, var0, var1);
*off = size_binop (ocode, off0, off1);
return true;
if (TREE_CODE (op1) != INTEGER_CST)
return false;
- split_constant_offset (op0, &var0, &off0);
+ split_constant_offset (op0, &var0, &off0, cache, limit);
*var = fold_build2 (MULT_EXPR, type, var0, op1);
*off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
return true;
case ADDR_EXPR:
{
tree base, poffset;
- HOST_WIDE_INT pbitsize, pbitpos;
+ poly_int64 pbitsize, pbitpos, pbytepos;
machine_mode pmode;
int punsignedp, preversep, pvolatilep;
= get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
&punsignedp, &preversep, &pvolatilep);
- if (pbitpos % BITS_PER_UNIT != 0)
+ if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
return false;
base = build_fold_addr_expr (base);
- off0 = ssize_int (pbitpos / BITS_PER_UNIT);
+ off0 = ssize_int (pbytepos);
if (poffset)
{
- split_constant_offset (poffset, &poffset, &off1);
+ split_constant_offset (poffset, &poffset, &off1, cache, limit);
off0 = size_binop (PLUS_EXPR, off0, off1);
if (POINTER_TYPE_P (TREE_TYPE (base)))
base = fold_build_pointer_plus (base, poffset);
if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
return false;
- var0 = gimple_assign_rhs1 (def_stmt);
subcode = gimple_assign_rhs_code (def_stmt);
+
+ /* We are using a cache to avoid un-CSEing large amounts of code. */
+ bool use_cache = false;
+ if (!has_single_use (op0)
+ && (subcode == POINTER_PLUS_EXPR
+ || subcode == PLUS_EXPR
+ || subcode == MINUS_EXPR
+ || subcode == MULT_EXPR
+ || subcode == ADDR_EXPR
+ || CONVERT_EXPR_CODE_P (subcode)))
+ {
+ use_cache = true;
+ bool existed;
+ std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
+ if (existed)
+ {
+ if (integer_zerop (e.second))
+ return false;
+ *var = e.first;
+ *off = e.second;
+ return true;
+ }
+ e = std::make_pair (op0, ssize_int (0));
+ }
+
+ if (*limit == 0)
+ return false;
+ --*limit;
+
+ var0 = gimple_assign_rhs1 (def_stmt);
var1 = gimple_assign_rhs2 (def_stmt);
- return split_constant_offset_1 (type, var0, subcode, var1, var, off);
+ bool res = split_constant_offset_1 (type, var0, subcode, var1,
+ var, off, cache, limit);
+ if (res && use_cache)
+ *cache.get (op0) = std::make_pair (*var, *off);
+ return res;
}
CASE_CONVERT:
{
- /* We must not introduce undefined overflow, and we must not change the value.
- Hence we're okay if the inner type doesn't overflow to start with
- (pointer or signed), the outer type also is an integer or pointer
- and the outer precision is at least as large as the inner. */
+ /* We must not introduce undefined overflow, and we must not change
+ the value. Hence we're okay if the inner type doesn't overflow
+ to start with (pointer or signed), the outer type also is an
+ integer or pointer and the outer precision is at least as large
+ as the inner. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
- || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+ || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
{
- split_constant_offset (op0, &var0, off);
+ if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
+ {
+ /* Split the unconverted operand and try to prove that
+ wrapping isn't a problem. */
+ tree tmp_var, tmp_off;
+ split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
+
+ /* See whether we have an SSA_NAME whose range is known
+ to be [A, B]. */
+ if (TREE_CODE (tmp_var) != SSA_NAME)
+ return false;
+ wide_int var_min, var_max;
+ value_range_kind vr_type = get_range_info (tmp_var, &var_min,
+ &var_max);
+ wide_int var_nonzero = get_nonzero_bits (tmp_var);
+ signop sgn = TYPE_SIGN (itype);
+ if (intersect_range_with_nonzero_bits (vr_type, &var_min,
+ &var_max, var_nonzero,
+ sgn) != VR_RANGE)
+ return false;
+
+ /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
+ is known to be [A + TMP_OFF, B + TMP_OFF], with all
+ operations done in ITYPE. The addition must overflow
+ at both ends of the range or at neither. */
+ wi::overflow_type overflow[2];
+ unsigned int prec = TYPE_PRECISION (itype);
+ wide_int woff = wi::to_wide (tmp_off, prec);
+ wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
+ wi::add (var_max, woff, sgn, &overflow[1]);
+ if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
+ return false;
+
+ /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
+ widest_int diff = (widest_int::from (op0_min, sgn)
+ - widest_int::from (var_min, sgn));
+ var0 = tmp_var;
+ *off = wide_int_to_tree (ssizetype, diff);
+ }
+ else
+ split_constant_offset (op0, &var0, off, cache, limit);
*var = fold_convert (type, var0);
return true;
}
/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
will be ssizetype. */
-void
-split_constant_offset (tree exp, tree *var, tree *off)
+static void
+split_constant_offset (tree exp, tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache,
+ unsigned *limit)
{
- tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
+ tree type = TREE_TYPE (exp), op0, op1, e, o;
enum tree_code code;
*var = exp;
*off = ssize_int (0);
- STRIP_NOPS (exp);
if (tree_is_chrec (exp)
|| get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
return;
- otype = TREE_TYPE (exp);
code = TREE_CODE (exp);
extract_ops_from_tree (exp, &code, &op0, &op1);
- if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
+ if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
{
- *var = fold_convert (type, e);
+ *var = e;
*off = o;
}
}
+void
+split_constant_offset (tree exp, tree *var, tree *off)
+{
+ unsigned limit = param_ssa_name_def_chain_limit;
+ static hash_map<tree, std::pair<tree, tree> > *cache;
+ if (!cache)
+ cache = new hash_map<tree, std::pair<tree, tree> > (37);
+ split_constant_offset (exp, var, off, *cache, &limit);
+ cache->empty ();
+}
+
/* Returns the address ADDR of an object in a canonical shape (without nop
casts, and with type of pointer to the object). */
return build_fold_addr_expr (TREE_OPERAND (addr, 0));
}
-/* Analyze the behavior of memory reference REF. There are two modes:
+/* Analyze the behavior of memory reference REF within STMT.
+ There are two modes:
- BB analysis. In this case we simply split the address into base,
init and offset components, without reference to any containing loop.
Return true if the analysis succeeded and store the results in DRB if so.
BB analysis can only fail for bitfield or reversed-storage accesses. */
-bool
+opt_result
dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
- struct loop *loop)
+ class loop *loop, const gimple *stmt)
{
- HOST_WIDE_INT pbitsize, pbitpos;
+ poly_int64 pbitsize, pbitpos;
tree base, poffset;
machine_mode pmode;
int punsignedp, preversep, pvolatilep;
&punsignedp, &preversep, &pvolatilep);
gcc_assert (base != NULL_TREE);
- if (pbitpos % BITS_PER_UNIT != 0)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: bit offset alignment.\n");
- return false;
- }
+ poly_int64 pbytepos;
+ if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
+ return opt_result::failure_at (stmt,
+ "failed: bit offset alignment.\n");
if (preversep)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: reverse storage order.\n");
- return false;
- }
+ return opt_result::failure_at (stmt,
+ "failed: reverse storage order.\n");
+
+ /* Calculate the alignment and misalignment for the inner reference. */
+ unsigned int HOST_WIDE_INT bit_base_misalignment;
+ unsigned int bit_base_alignment;
+ get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
+
+ /* There are no bitfield references remaining in BASE, so the values
+ we got back must be whole bytes. */
+ gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
+ && bit_base_misalignment % BITS_PER_UNIT == 0);
+ unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
+ poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
if (TREE_CODE (base) == MEM_REF)
{
if (!integer_zerop (TREE_OPERAND (base, 1)))
{
- offset_int moff = mem_ref_offset (base);
+ /* Subtract MOFF from the base and add it to POFFSET instead.
+ Adjust the misalignment to reflect the amount we subtracted. */
+ poly_offset_int moff = mem_ref_offset (base);
+ base_misalignment -= moff.force_shwi ();
tree mofft = wide_int_to_tree (sizetype, moff);
if (!poffset)
poffset = mofft;
if (in_loop)
{
if (!simple_iv (loop, loop, base, &base_iv, true))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of base is not affine.\n");
- return false;
- }
+ return opt_result::failure_at
+ (stmt, "failed: evolution of base is not affine.\n");
}
else
{
offset_iv.step = ssize_int (0);
}
else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of offset is not affine.\n");
- return false;
- }
+ return opt_result::failure_at
+ (stmt, "failed: evolution of offset is not affine.\n");
}
- init = ssize_int (pbitpos / BITS_PER_UNIT);
+ init = ssize_int (pbytepos);
+
+ /* Subtract any constant component from the base and add it to INIT instead.
+ Adjust the misalignment to reflect the amount we subtracted. */
split_constant_offset (base_iv.base, &base_iv.base, &dinit);
- init = size_binop (PLUS_EXPR, init, dinit);
+ init = size_binop (PLUS_EXPR, init, dinit);
+ base_misalignment -= TREE_INT_CST_LOW (dinit);
+
split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
- init = size_binop (PLUS_EXPR, init, dinit);
+ init = size_binop (PLUS_EXPR, init, dinit);
step = size_binop (PLUS_EXPR,
fold_convert (ssizetype, base_iv.step),
fold_convert (ssizetype, offset_iv.step));
- drb->base_address = canonicalize_base_object_address (base_iv.base);
+ base = canonicalize_base_object_address (base_iv.base);
+
+ /* See if get_pointer_alignment can guarantee a higher alignment than
+ the one we calculated above. */
+ unsigned int HOST_WIDE_INT alt_misalignment;
+ unsigned int alt_alignment;
+ get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
+ /* As above, these values must be whole bytes. */
+ gcc_assert (alt_alignment % BITS_PER_UNIT == 0
+ && alt_misalignment % BITS_PER_UNIT == 0);
+ alt_alignment /= BITS_PER_UNIT;
+ alt_misalignment /= BITS_PER_UNIT;
+
+ if (base_alignment < alt_alignment)
+ {
+ base_alignment = alt_alignment;
+ base_misalignment = alt_misalignment;
+ }
+
+ drb->base_address = base;
drb->offset = fold_convert (ssizetype, offset_iv.base);
drb->init = init;
drb->step = step;
- drb->aligned_to = size_int (highest_pow2_factor (offset_iv.base));
+ if (known_misalignment (base_misalignment, base_alignment,
+ &drb->base_misalignment))
+ drb->base_alignment = base_alignment;
+ else
+ {
+ drb->base_alignment = known_alignment (base_misalignment);
+ drb->base_misalignment = 0;
+ }
+ drb->offset_alignment = highest_pow2_factor (offset_iv.base);
+ drb->step_alignment = highest_pow2_factor (step);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "success.\n");
- return true;
+ return opt_result::success ();
+}
+
+/* Return true if OP is a valid component reference for a DR access
+ function. This accepts a subset of what handled_component_p accepts. */
+
+static bool
+access_fn_component_p (tree op)
+{
+ switch (TREE_CODE (op))
+ {
+ case REALPART_EXPR:
+ case IMAGPART_EXPR:
+ case ARRAY_REF:
+ return true;
+
+ case COMPONENT_REF:
+ return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
+
+ default:
+ return false;
+ }
}
/* Determines the base object and the list of indices of memory reference
- DR, analyzed in LOOP and instantiated in loop nest NEST. */
+ DR, analyzed in LOOP and instantiated before NEST. */
static void
-dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
+dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
{
vec<tree> access_fns = vNULL;
tree ref, op;
tree base, off, access_fn;
- basic_block before_loop;
/* If analyzing a basic-block there are no indices to analyze
and thus no access functions. */
}
ref = DR_REF (dr);
- before_loop = block_before_loop (nest);
/* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
into a two element array with a constant index. The base is
access_fns.safe_push (integer_one_node);
}
- /* Analyze access functions of dimensions we know to be independent. */
+ /* Analyze access functions of dimensions we know to be independent.
+ The list of component references handled here should be kept in
+ sync with access_fn_component_p. */
while (handled_component_p (ref))
{
if (TREE_CODE (ref) == ARRAY_REF)
{
op = TREE_OPERAND (ref, 1);
access_fn = analyze_scalar_evolution (loop, op);
- access_fn = instantiate_scev (before_loop, loop, access_fn);
+ access_fn = instantiate_scev (nest, loop, access_fn);
access_fns.safe_push (access_fn);
}
else if (TREE_CODE (ref) == COMPONENT_REF
{
op = TREE_OPERAND (ref, 0);
access_fn = analyze_scalar_evolution (loop, op);
- access_fn = instantiate_scev (before_loop, loop, access_fn);
+ access_fn = instantiate_scev (nest, loop, access_fn);
if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
{
tree orig_type;
if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
&& TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
&& !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
- rem = wi::mod_trunc (off, TYPE_SIZE_UNIT (TREE_TYPE (ref)), SIGNED);
+ rem = wi::mod_trunc
+ (wi::to_wide (off),
+ wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
+ SIGNED);
else
/* If we can't compute the remainder simply force the initial
condition to zero. */
- rem = off;
- off = wide_int_to_tree (ssizetype, wi::sub (off, rem));
+ rem = wi::to_wide (off);
+ off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
/* And finally replace the initial condition. */
access_fn = chrec_replace_initial_condition
free (dr);
}
-/* Analyzes memory reference MEMREF accessed in STMT. The reference
- is read if IS_READ is true, write otherwise. Returns the
- data_reference description of MEMREF. NEST is the outermost loop
- in which the reference should be instantiated, LOOP is the loop in
- which the data reference should be analyzed. */
+/* Analyze memory reference MEMREF, which is accessed in STMT.
+ The reference is a read if IS_READ is true, otherwise it is a write.
+ IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
+ within STMT, i.e. that it might not occur even if STMT is executed
+ and runs to completion.
+
+ Return the data_reference description of MEMREF. NEST is the outermost
+ loop in which the reference should be instantiated, LOOP is the loop
+ in which the data reference should be analyzed. */
struct data_reference *
-create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt,
- bool is_read)
+create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
+ bool is_read, bool is_conditional_in_stmt)
{
struct data_reference *dr;
DR_STMT (dr) = stmt;
DR_REF (dr) = memref;
DR_IS_READ (dr) = is_read;
+ DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
dr_analyze_innermost (&DR_INNERMOST (dr), memref,
- nest != NULL ? loop : NULL);
+ nest != NULL ? loop : NULL, stmt);
dr_analyze_indices (dr, nest, loop);
dr_analyze_alias (dr);
print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
fprintf (dump_file, "\n\tstep: ");
print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
- fprintf (dump_file, "\n\taligned to: ");
- print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
+ fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
+ fprintf (dump_file, "\n\tbase misalignment: %d",
+ DR_BASE_MISALIGNMENT (dr));
+ fprintf (dump_file, "\n\toffset alignment: %d",
+ DR_OFFSET_ALIGNMENT (dr));
+ fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
fprintf (dump_file, "\n\tbase_object: ");
print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
fprintf (dump_file, "\n");
return dr;
}
-/* A helper function computes order between two tree epxressions T1 and T2.
+/* A helper function computes order between two tree expressions T1 and T2.
This is used in comparator functions sorting objects based on the order
of tree expressions. The function returns -1, 0, or 1. */
if (t2 == NULL)
return 1;
- STRIP_NOPS (t1);
- STRIP_NOPS (t2);
+ STRIP_USELESS_TYPE_CONVERSION (t1);
+ STRIP_USELESS_TYPE_CONVERSION (t2);
+ if (t1 == t2)
+ return 0;
- if (TREE_CODE (t1) != TREE_CODE (t2))
+ if (TREE_CODE (t1) != TREE_CODE (t2)
+ && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
code = TREE_CODE (t1);
switch (code)
{
- /* For const values, we can just use hash values for comparisons. */
case INTEGER_CST:
- case REAL_CST:
- case FIXED_CST:
+ return tree_int_cst_compare (t1, t2);
+
case STRING_CST:
- case COMPLEX_CST:
- case VECTOR_CST:
- {
- hashval_t h1 = iterative_hash_expr (t1, 0);
- hashval_t h2 = iterative_hash_expr (t2, 0);
- if (h1 != h2)
- return h1 < h2 ? -1 : 1;
- break;
- }
+ if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
+ return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
+ return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
+ TREE_STRING_LENGTH (t1));
case SSA_NAME:
- cmp = data_ref_compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
- if (cmp != 0)
- return cmp;
-
if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
break;
default:
+ if (POLY_INT_CST_P (t1))
+ return compare_sizes_for_sort (wi::to_poly_widest (t1),
+ wi::to_poly_widest (t2));
+
tclass = TREE_CODE_CLASS (code);
- /* For var-decl, we could compare their UIDs. */
+ /* For decls, compare their UIDs. */
if (tclass == tcc_declaration)
{
if (DECL_UID (t1) != DECL_UID (t2))
return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
break;
}
-
- /* For expressions with operands, compare their operands recursively. */
- for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
+ /* For expressions, compare their operands recursively. */
+ else if (IS_EXPR_CODE_CLASS (tclass))
{
- cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
- TREE_OPERAND (t2, i));
- if (cmp != 0)
- return cmp;
+ for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
+ {
+ cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
+ TREE_OPERAND (t2, i));
+ if (cmp != 0)
+ return cmp;
+ }
}
+ else
+ gcc_unreachable ();
}
return 0;
/* Return TRUE it's possible to resolve data dependence DDR by runtime alias
check. */
-bool
-runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
+opt_result
+runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
{
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE,
+ "consider run-time aliasing test between %T and %T\n",
+ DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
if (!speed_p)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported when optimizing "
- "for size.\n");
- return false;
- }
+ return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
+ "runtime alias check not supported when"
+ " optimizing for size.\n");
/* FORNOW: We don't support versioning with outer-loop in either
vectorization or loop distribution. */
if (loop != NULL && loop->inner != NULL)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported for outer loop.\n");
- return false;
- }
-
- /* FORNOW: We don't support creating runtime alias tests for non-constant
- step. */
- if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
- || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported for non-constant "
- "step\n");
- return false;
- }
+ return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
+ "runtime alias check not supported for"
+ " outer loop.\n");
- return true;
+ return opt_result::success ();
}
/* Operator == between two dr_with_seg_len objects.
operator == (const dr_with_seg_len& d1,
const dr_with_seg_len& d2)
{
- return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
- DR_BASE_ADDRESS (d2.dr), 0)
- && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
- && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
- && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
+ return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
+ DR_BASE_ADDRESS (d2.dr), 0)
+ && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
+ && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
+ && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
+ && known_eq (d1.access_size, d2.access_size)
+ && d1.align == d2.align);
}
/* Comparison function for sorting objects of dr_with_seg_len_pair_t
return 0;
}
+/* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
+
+static void
+dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
+{
+ dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
+ DR_REF (alias_pair->first.dr),
+ DR_REF (alias_pair->second.dr));
+
+ dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
+ alias_pair->first.seg_len);
+ if (!operand_equal_p (alias_pair->first.seg_len,
+ alias_pair->second.seg_len, 0))
+ dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
+
+ dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
+ dump_dec (MSG_NOTE, alias_pair->first.access_size);
+ if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
+ {
+ dump_printf (MSG_NOTE, " vs. ");
+ dump_dec (MSG_NOTE, alias_pair->second.access_size);
+ }
+
+ dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
+ alias_pair->first.align);
+ if (alias_pair->first.align != alias_pair->second.align)
+ dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
+
+ dump_printf (MSG_NOTE, "\n%sflags: ", indent);
+ if (alias_pair->flags & DR_ALIAS_RAW)
+ dump_printf (MSG_NOTE, " RAW");
+ if (alias_pair->flags & DR_ALIAS_WAR)
+ dump_printf (MSG_NOTE, " WAR");
+ if (alias_pair->flags & DR_ALIAS_WAW)
+ dump_printf (MSG_NOTE, " WAW");
+ if (alias_pair->flags & DR_ALIAS_ARBITRARY)
+ dump_printf (MSG_NOTE, " ARBITRARY");
+ if (alias_pair->flags & DR_ALIAS_SWAPPED)
+ dump_printf (MSG_NOTE, " SWAPPED");
+ if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
+ dump_printf (MSG_NOTE, " UNSWAPPED");
+ if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
+ dump_printf (MSG_NOTE, " MIXED_STEPS");
+ if (alias_pair->flags == 0)
+ dump_printf (MSG_NOTE, " <none>");
+ dump_printf (MSG_NOTE, "\n");
+}
+
/* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
FACTOR is number of iterations that each data reference is accessed.
void
prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
- unsigned HOST_WIDE_INT factor)
+ poly_uint64)
{
+ if (alias_pairs->is_empty ())
+ return;
+
+ /* Canonicalize each pair so that the base components are ordered wrt
+ data_ref_compare_tree. This allows the loop below to merge more
+ cases. */
+ unsigned int i;
+ dr_with_seg_len_pair_t *alias_pair;
+ FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
+ {
+ data_reference_p dr_a = alias_pair->first.dr;
+ data_reference_p dr_b = alias_pair->second.dr;
+ int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
+ DR_BASE_ADDRESS (dr_b));
+ if (comp_res == 0)
+ comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
+ if (comp_res == 0)
+ comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
+ if (comp_res > 0)
+ {
+ std::swap (alias_pair->first, alias_pair->second);
+ alias_pair->flags |= DR_ALIAS_SWAPPED;
+ }
+ else
+ alias_pair->flags |= DR_ALIAS_UNSWAPPED;
+ }
+
/* Sort the collected data ref pairs so that we can scan them once to
combine all possible aliasing checks. */
alias_pairs->qsort (comp_dr_with_seg_len_pair);
/* Scan the sorted dr pairs and check if we can combine alias checks
of two neighboring dr pairs. */
- for (size_t i = 1; i < alias_pairs->length (); ++i)
+ unsigned int last = 0;
+ for (i = 1; i < alias_pairs->length (); ++i)
{
/* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
- dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
- *dr_b1 = &(*alias_pairs)[i-1].second,
- *dr_a2 = &(*alias_pairs)[i].first,
- *dr_b2 = &(*alias_pairs)[i].second;
+ dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
+ dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
+
+ dr_with_seg_len *dr_a1 = &alias_pair1->first;
+ dr_with_seg_len *dr_b1 = &alias_pair1->second;
+ dr_with_seg_len *dr_a2 = &alias_pair2->first;
+ dr_with_seg_len *dr_b2 = &alias_pair2->second;
/* Remove duplicate data ref pairs. */
if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
{
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "found equal ranges ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
- alias_pairs->ordered_remove (i--);
+ dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
+ DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
+ alias_pair1->flags |= alias_pair2->flags;
continue;
}
+ /* Assume that we won't be able to merge the pairs, then correct
+ if we do. */
+ last += 1;
+ if (last != i)
+ (*alias_pairs)[last] = (*alias_pairs)[i];
+
if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
{
/* We consider the case that DR_B1 and DR_B2 are same memrefs,
std::swap (dr_a2, dr_b2);
}
+ poly_int64 init_a1, init_a2;
+ /* Only consider cases in which the distance between the initial
+ DR_A1 and the initial DR_A2 is known at compile time. */
if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
DR_BASE_ADDRESS (dr_a2->dr), 0)
|| !operand_equal_p (DR_OFFSET (dr_a1->dr),
DR_OFFSET (dr_a2->dr), 0)
- || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
- || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
+ || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
+ || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
continue;
- /* Only merge const step data references. */
- if (TREE_CODE (DR_STEP (dr_a1->dr)) != INTEGER_CST
- || TREE_CODE (DR_STEP (dr_a2->dr)) != INTEGER_CST)
+ /* Don't combine if we can't tell which one comes first. */
+ if (!ordered_p (init_a1, init_a2))
continue;
- /* DR_A1 and DR_A2 must goes in the same direction. */
- if (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node)
- != tree_int_cst_compare (DR_STEP (dr_a2->dr), size_zero_node))
- continue;
+ /* Work out what the segment length would be if we did combine
+ DR_A1 and DR_A2:
- bool neg_step
- = (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node) < 0);
+ - If DR_A1 and DR_A2 have equal lengths, that length is
+ also the combined length.
- /* We need to compute merged segment length at compilation time for
- dr_a1 and dr_a2, which is impossible if either one has non-const
- segment length. */
- if ((!tree_fits_uhwi_p (dr_a1->seg_len)
- || !tree_fits_uhwi_p (dr_a2->seg_len))
- && tree_int_cst_compare (DR_STEP (dr_a1->dr),
- DR_STEP (dr_a2->dr)) != 0)
- continue;
+ - If DR_A1 and DR_A2 both have negative "lengths", the combined
+ length is the lower bound on those lengths.
+
+ - If DR_A1 and DR_A2 both have positive lengths, the combined
+ length is the upper bound on those lengths.
+
+ Other cases are unlikely to give a useful combination.
+
+ The lengths both have sizetype, so the sign is taken from
+ the step instead. */
+ poly_uint64 new_seg_len = 0;
+ bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
+ dr_a2->seg_len, 0);
+ if (new_seg_len_p)
+ {
+ poly_uint64 seg_len_a1, seg_len_a2;
+ if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
+ || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
+ continue;
+
+ tree indicator_a = dr_direction_indicator (dr_a1->dr);
+ if (TREE_CODE (indicator_a) != INTEGER_CST)
+ continue;
+
+ tree indicator_b = dr_direction_indicator (dr_a2->dr);
+ if (TREE_CODE (indicator_b) != INTEGER_CST)
+ continue;
+
+ int sign_a = tree_int_cst_sgn (indicator_a);
+ int sign_b = tree_int_cst_sgn (indicator_b);
+
+ if (sign_a <= 0 && sign_b <= 0)
+ new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
+ else if (sign_a >= 0 && sign_b >= 0)
+ new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
+ else
+ continue;
+ }
+ /* At this point we're committed to merging the refs. */
/* Make sure dr_a1 starts left of dr_a2. */
- if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
- std::swap (*dr_a1, *dr_a2);
+ if (maybe_gt (init_a1, init_a2))
+ {
+ std::swap (*dr_a1, *dr_a2);
+ std::swap (init_a1, init_a2);
+ }
- bool do_remove = false;
- wide_int diff = wi::sub (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr));
- wide_int min_seg_len_b;
- tree new_seg_len;
+ /* The DR_Bs are equal, so only the DR_As can introduce
+ mixed steps. */
+ if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
+ alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
- if (TREE_CODE (dr_b1->seg_len) == INTEGER_CST)
- min_seg_len_b = wi::abs (dr_b1->seg_len);
- else
- min_seg_len_b = wi::mul (factor, wi::abs (DR_STEP (dr_b1->dr)));
+ if (new_seg_len_p)
+ {
+ dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
+ new_seg_len);
+ dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
+ }
- /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
+ /* This is always positive due to the swap above. */
+ poly_uint64 diff = init_a2 - init_a1;
- Case A:
- check if the following condition is satisfied:
+ /* The new check will start at DR_A1. Make sure that its access
+ size encompasses the initial DR_A2. */
+ if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
+ {
+ dr_a1->access_size = upper_bound (dr_a1->access_size,
+ diff + dr_a2->access_size);
+ unsigned int new_align = known_alignment (dr_a1->access_size);
+ dr_a1->align = MIN (dr_a1->align, new_align);
+ }
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
+ DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
+ alias_pair1->flags |= alias_pair2->flags;
+ last -= 1;
+ }
+ }
+ alias_pairs->truncate (last + 1);
- DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
+ /* Try to restore the original dr_with_seg_len order within each
+ dr_with_seg_len_pair_t. If we ended up combining swapped and
+ unswapped pairs into the same check, we have to invalidate any
+ RAW, WAR and WAW information for it. */
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "merged alias checks:\n");
+ FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
+ {
+ unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
+ unsigned int swapped = (alias_pair->flags & swap_mask);
+ if (swapped == DR_ALIAS_SWAPPED)
+ std::swap (alias_pair->first, alias_pair->second);
+ else if (swapped != DR_ALIAS_UNSWAPPED)
+ alias_pair->flags |= DR_ALIAS_ARBITRARY;
+ alias_pair->flags &= ~swap_mask;
+ if (dump_enabled_p ())
+ dump_alias_pair (alias_pair, " ");
+ }
+}
- where DIFF = DR_A2_INIT - DR_A1_INIT. However,
- SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
- have to make a best estimation. We can get the minimum value
- of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
- then either of the following two conditions can guarantee the
- one above:
+/* A subroutine of create_intersect_range_checks, with a subset of the
+ same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
+ to optimize cases in which the references form a simple RAW, WAR or
+ WAR dependence. */
- 1: DIFF <= MIN_SEG_LEN_B
- 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
- Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
- to take care of wrapping behavior in it.
+static bool
+create_ifn_alias_checks (tree *cond_expr,
+ const dr_with_seg_len_pair_t &alias_pair)
+{
+ const dr_with_seg_len& dr_a = alias_pair.first;
+ const dr_with_seg_len& dr_b = alias_pair.second;
- Case B:
- If the left segment does not extend beyond the start of the
- right segment the new segment length is that of the right
- plus the segment distance. The condition is like:
+ /* Check for cases in which:
- DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
+ (a) we have a known RAW, WAR or WAR dependence
+ (b) the accesses are well-ordered in both the original and new code
+ (see the comment above the DR_ALIAS_* flags for details); and
+ (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
+ if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
+ return false;
- Note 1: Case A.2 and B combined together effectively merges every
- dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
+ /* Make sure that both DRs access the same pattern of bytes,
+ with a constant length and and step. */
+ poly_uint64 seg_len;
+ if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
+ || !poly_int_tree_p (dr_a.seg_len, &seg_len)
+ || maybe_ne (dr_a.access_size, dr_b.access_size)
+ || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
+ || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
+ return false;
- Note 2: Above description is based on positive DR_STEP, we need to
- take care of negative DR_STEP for wrapping behavior. See PR80815
- for more information. */
- if (neg_step)
- {
- /* Adjust diff according to access size of both references. */
- tree size_a1 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)));
- tree size_a2 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)));
- diff = wi::add (diff, wi::sub (size_a2, size_a1));
- /* Case A.1. */
- if (wi::leu_p (diff, min_seg_len_b)
- /* Case A.2 and B combined. */
- || (tree_fits_uhwi_p (dr_a2->seg_len)))
- {
- if (tree_fits_uhwi_p (dr_a1->seg_len)
- && tree_fits_uhwi_p (dr_a2->seg_len))
- new_seg_len
- = wide_int_to_tree (sizetype,
- wi::umin (wi::sub (dr_a1->seg_len,
- diff),
- dr_a2->seg_len));
- else
- new_seg_len
- = size_binop (MINUS_EXPR, dr_a2->seg_len,
- wide_int_to_tree (sizetype, diff));
+ unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
+ tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
+ tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
+
+ /* See whether the target suports what we want to do. WAW checks are
+ equivalent to WAR checks here. */
+ internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
+ ? IFN_CHECK_RAW_PTRS
+ : IFN_CHECK_WAR_PTRS);
+ unsigned int align = MIN (dr_a.align, dr_b.align);
+ poly_uint64 full_length = seg_len + bytes;
+ if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
+ full_length, align))
+ {
+ full_length = seg_len + dr_a.access_size;
+ if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
+ full_length, align))
+ return false;
+ }
- dr_a2->seg_len = new_seg_len;
- do_remove = true;
- }
- }
- else
- {
- /* Case A.1. */
- if (wi::leu_p (diff, min_seg_len_b)
- /* Case A.2 and B combined. */
- || (tree_fits_uhwi_p (dr_a1->seg_len)))
- {
- if (tree_fits_uhwi_p (dr_a1->seg_len)
- && tree_fits_uhwi_p (dr_a2->seg_len))
- new_seg_len
- = wide_int_to_tree (sizetype,
- wi::umax (wi::add (dr_a2->seg_len,
- diff),
- dr_a1->seg_len));
- else
- new_seg_len
- = size_binop (PLUS_EXPR, dr_a2->seg_len,
- wide_int_to_tree (sizetype, diff));
+ /* Commit to using this form of test. */
+ addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
+ addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
- dr_a1->seg_len = new_seg_len;
- do_remove = true;
- }
- }
+ addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
+ addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
- if (do_remove)
- {
- if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "merging ranges for ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
- alias_pairs->ordered_remove (neg_step ? i - 1 : i);
- i--;
- }
- }
+ *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
+ ifn, boolean_type_node,
+ 4, addr_a, addr_b,
+ size_int (full_length),
+ size_int (align));
+
+ if (dump_enabled_p ())
+ {
+ if (ifn == IFN_CHECK_RAW_PTRS)
+ dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
+ else
+ dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
}
+ return true;
}
-/* Given LOOP's two data references and segment lengths described by DR_A
- and DR_B, create expression checking if the two addresses ranges intersect
- with each other based on index of the two addresses. This can only be
- done if DR_A and DR_B referring to the same (array) object and the index
- is the only difference. For example:
+/* Try to generate a runtime condition that is true if ALIAS_PAIR is
+ free of aliases, using a condition based on index values instead
+ of a condition based on addresses. Return true on success,
+ storing the condition in *COND_EXPR.
+
+ This can only be done if the two data references in ALIAS_PAIR access
+ the same array object and the index is the only difference. For example,
+ if the two data references are DR_A and DR_B:
DR_A DR_B
data-ref arr[i] arr[j]
We can create expression based on index rather than address:
- (i_0 + 4 < j_0 || j_0 + 4 < i_0)
+ (unsigned) (i_0 - j_0 + 3) <= 6
+
+ i.e. the indices are less than 4 apart.
Note evolution step of index needs to be considered in comparison. */
static bool
-create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
- const dr_with_seg_len& dr_a,
- const dr_with_seg_len& dr_b)
+create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
+ const dr_with_seg_len_pair_t &alias_pair)
{
- if (integer_zerop (DR_STEP (dr_a.dr))
+ const dr_with_seg_len &dr_a = alias_pair.first;
+ const dr_with_seg_len &dr_b = alias_pair.second;
+ if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
+ || integer_zerop (DR_STEP (dr_a.dr))
|| integer_zerop (DR_STEP (dr_b.dr))
|| DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
return false;
- if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
+ poly_uint64 seg_len1, seg_len2;
+ if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
+ || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
return false;
if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
- unsigned HOST_WIDE_INT abs_step
- = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
+ unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
+ if (neg_step)
+ {
+ abs_step = -abs_step;
+ seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
+ seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
+ }
- unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
- unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
/* Infer the number of iterations with which the memory segment is accessed
by DR. In other words, alias is checked if memory segment accessed by
DR_A in some iterations intersect with memory segment accessed by DR_B
in the same amount iterations.
Note segnment length is a linear function of number of iterations with
DR_STEP as the coefficient. */
- unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
- unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
+ poly_uint64 niter_len1, niter_len2;
+ if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
+ || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
+ return false;
+
+ /* Divide each access size by the byte step, rounding up. */
+ poly_uint64 niter_access1, niter_access2;
+ if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
+ abs_step, &niter_access1)
+ || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
+ abs_step, &niter_access2))
+ return false;
+
+ bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
unsigned int i;
for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
index of data reference. Like segment length, index length is
linear function of the number of iterations with index_step as
the coefficient, i.e, niter_len * idx_step. */
- tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
- build_int_cst (TREE_TYPE (min1),
- niter_len1));
- tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
- build_int_cst (TREE_TYPE (min2),
- niter_len2));
- tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
- tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
- /* Adjust ranges for negative step. */
+ offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
+ SIGNED);
if (neg_step)
- {
- min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
- max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
- CHREC_LEFT (access1), idx_step);
- min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
- max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
- CHREC_LEFT (access2), idx_step);
- }
- tree part_cond_expr
- = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
- fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
- fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
+ abs_idx_step = -abs_idx_step;
+ poly_offset_int idx_len1 = abs_idx_step * niter_len1;
+ poly_offset_int idx_len2 = abs_idx_step * niter_len2;
+ poly_offset_int idx_access1 = abs_idx_step * niter_access1;
+ poly_offset_int idx_access2 = abs_idx_step * niter_access2;
+
+ gcc_assert (known_ge (idx_len1, 0)
+ && known_ge (idx_len2, 0)
+ && known_ge (idx_access1, 0)
+ && known_ge (idx_access2, 0));
+
+ /* Each access has the following pattern, with lengths measured
+ in units of INDEX:
+
+ <-- idx_len -->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ..... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <-- idx_len -->
+ |
+ min
+
+ where "n" is the number of scalar iterations covered by the segment
+ and where each access spans idx_access units.
+
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
+
+ When checking for general overlap, we need to test whether
+ the range:
+
+ [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
+
+ overlaps:
+
+ [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
+
+ where:
+
+ low_offsetN = +ve step ? 0 : -idx_lenN;
+ high_offsetN = +ve step ? idx_lenN : 0;
+
+ This is equivalent to testing whether:
+
+ min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
+ && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
+
+ Converting this into a single test, there is an overlap if:
+
+ 0 <= min2 - min1 + bias <= limit
+
+ where bias = high_offset2 + idx_access2 - 1 - low_offset1
+ limit = (high_offset1 - low_offset1 + idx_access1 - 1)
+ + (high_offset2 - low_offset2 + idx_access2 - 1)
+ i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
+
+ Combining the tests requires limit to be computable in an unsigned
+ form of the index type; if it isn't, we fall back to the usual
+ pointer-based checks.
+
+ We can do better if DR_B is a write and if DR_A and DR_B are
+ well-ordered in both the original and the new code (see the
+ comment above the DR_ALIAS_* flags for details). In this case
+ we know that for each i in [0, n-1], the write performed by
+ access i of DR_B occurs after access numbers j<=i of DR_A in
+ both the original and the new code. Any write or anti
+ dependencies wrt those DR_A accesses are therefore maintained.
+
+ We just need to make sure that each individual write in DR_B does not
+ overlap any higher-indexed access in DR_A; such DR_A accesses happen
+ after the DR_B access in the original code but happen before it in
+ the new code.
+
+ We know the steps for both accesses are equal, so by induction, we
+ just need to test whether the first write of DR_B overlaps a later
+ access of DR_A. In other words, we need to move min1 along by
+ one iteration:
+
+ min1' = min1 + idx_step
+
+ and use the ranges:
+
+ [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
+
+ and:
+
+ [min2, min2 + idx_access2 - 1]
+
+ where:
+
+ low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
+ high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
+ if (waw_or_war_p)
+ idx_len1 -= abs_idx_step;
+
+ poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
+ if (!waw_or_war_p)
+ limit += idx_len2;
+
+ tree utype = unsigned_type_for (TREE_TYPE (min1));
+ if (!wi::fits_to_tree_p (limit, utype))
+ return false;
+
+ poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
+ poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
+ poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
+ /* Equivalent to adding IDX_STEP to MIN1. */
+ if (waw_or_war_p)
+ bias -= wi::to_offset (idx_step);
+
+ tree subject = fold_build2 (MINUS_EXPR, utype,
+ fold_convert (utype, min2),
+ fold_convert (utype, min1));
+ subject = fold_build2 (PLUS_EXPR, utype, subject,
+ wide_int_to_tree (utype, bias));
+ tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
+ wide_int_to_tree (utype, limit));
if (*cond_expr)
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
*cond_expr, part_cond_expr);
else
*cond_expr = part_cond_expr;
}
+ if (dump_enabled_p ())
+ {
+ if (waw_or_war_p)
+ dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
+ else
+ dump_printf (MSG_NOTE, "using an index-based overlap test\n");
+ }
+ return true;
+}
+
+/* A subroutine of create_intersect_range_checks, with a subset of the
+ same arguments. Try to optimize cases in which the second access
+ is a write and in which some overlap is valid. */
+
+static bool
+create_waw_or_war_checks (tree *cond_expr,
+ const dr_with_seg_len_pair_t &alias_pair)
+{
+ const dr_with_seg_len& dr_a = alias_pair.first;
+ const dr_with_seg_len& dr_b = alias_pair.second;
+
+ /* Check for cases in which:
+
+ (a) DR_B is always a write;
+ (b) the accesses are well-ordered in both the original and new code
+ (see the comment above the DR_ALIAS_* flags for details); and
+ (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
+ if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
+ return false;
+
+ /* Check for equal (but possibly variable) steps. */
+ tree step = DR_STEP (dr_a.dr);
+ if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
+ return false;
+
+ /* Make sure that we can operate on sizetype without loss of precision. */
+ tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
+ if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
+ return false;
+
+ /* All addresses involved are known to have a common alignment ALIGN.
+ We can therefore subtract ALIGN from an exclusive endpoint to get
+ an inclusive endpoint. In the best (and common) case, ALIGN is the
+ same as the access sizes of both DRs, and so subtracting ALIGN
+ cancels out the addition of an access size. */
+ unsigned int align = MIN (dr_a.align, dr_b.align);
+ poly_uint64 last_chunk_a = dr_a.access_size - align;
+ poly_uint64 last_chunk_b = dr_b.access_size - align;
+
+ /* Get a boolean expression that is true when the step is negative. */
+ tree indicator = dr_direction_indicator (dr_a.dr);
+ tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+
+ /* Get lengths in sizetype. */
+ tree seg_len_a
+ = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
+ step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
+
+ /* Each access has the following pattern:
+
+ <- |seg_len| ->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ..... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <- |seg_len| ->
+ |
+ base address
+
+ where "n" is the number of scalar iterations covered by the segment.
+
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
+
+ We know that DR_B is a write. We also know (from checking that
+ DR_A and DR_B are well-ordered) that for each i in [0, n-1],
+ the write performed by access i of DR_B occurs after access numbers
+ j<=i of DR_A in both the original and the new code. Any write or
+ anti dependencies wrt those DR_A accesses are therefore maintained.
+
+ We just need to make sure that each individual write in DR_B does not
+ overlap any higher-indexed access in DR_A; such DR_A accesses happen
+ after the DR_B access in the original code but happen before it in
+ the new code.
+
+ We know the steps for both accesses are equal, so by induction, we
+ just need to test whether the first write of DR_B overlaps a later
+ access of DR_A. In other words, we need to move addr_a along by
+ one iteration:
+
+ addr_a' = addr_a + step
+
+ and check whether:
+
+ [addr_b, addr_b + last_chunk_b]
+
+ overlaps:
+
+ [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
+
+ where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
+
+ low_offset_a = +ve step ? 0 : seg_len_a - step
+ high_offset_a = +ve step ? seg_len_a - step : 0
+
+ This is equivalent to testing whether:
+
+ addr_a' + low_offset_a <= addr_b + last_chunk_b
+ && addr_b <= addr_a' + high_offset_a + last_chunk_a
+
+ Converting this into a single test, there is an overlap if:
+
+ 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
+
+ where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
+
+ If DR_A is performed, limit + |step| - last_chunk_b is known to be
+ less than the size of the object underlying DR_A. We also know
+ that last_chunk_b <= |step|; this is checked elsewhere if it isn't
+ guaranteed at compile time. There can therefore be no overflow if
+ "limit" is calculated in an unsigned type with pointer precision. */
+ tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
+ DR_OFFSET (dr_a.dr));
+ addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
+
+ tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
+ DR_OFFSET (dr_b.dr));
+ addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
+
+ /* Advance ADDR_A by one iteration and adjust the length to compensate. */
+ addr_a = fold_build_pointer_plus (addr_a, step);
+ tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
+ seg_len_a, step);
+ if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
+ seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
+
+ tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
+ seg_len_a_minus_step, size_zero_node);
+ if (!CONSTANT_CLASS_P (low_offset_a))
+ low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
+
+ /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
+ but it's usually more efficient to reuse the LOW_OFFSET_A result. */
+ tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
+ low_offset_a);
+
+ /* The amount added to addr_b - addr_a'. */
+ tree bias = fold_build2 (MINUS_EXPR, sizetype,
+ size_int (last_chunk_b), low_offset_a);
+
+ tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
+ limit = fold_build2 (PLUS_EXPR, sizetype, limit,
+ size_int (last_chunk_a + last_chunk_b));
+
+ tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
+ subject = fold_build2 (PLUS_EXPR, sizetype,
+ fold_convert (sizetype, subject), bias);
+
+ *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
return true;
}
-/* Given two data references and segment lengths described by DR_A and DR_B,
- create expression checking if the two addresses ranges intersect with
- each other:
+/* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
+ every address ADDR accessed by D:
+
+ *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
+
+ In this case, every element accessed by D is aligned to at least
+ ALIGN bytes.
- ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
- || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
+ If ALIGN is zero then instead set *SEG_MAX_OUT so that:
+
+ *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
static void
-create_intersect_range_checks (struct loop *loop, tree *cond_expr,
- const dr_with_seg_len& dr_a,
- const dr_with_seg_len& dr_b)
+get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
+ tree *seg_max_out, HOST_WIDE_INT align)
{
+ /* Each access has the following pattern:
+
+ <- |seg_len| ->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ,.... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <- |seg_len| ->
+ |
+ base address
+
+ where "n" is the number of scalar iterations covered by the segment.
+ (This should be VF for a particular pair if we know that both steps
+ are the same, otherwise it will be the full number of scalar loop
+ iterations.)
+
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
+
+ If the access size is "access_size" bytes, the lowest addressed byte is:
+
+ base + (step < 0 ? seg_len : 0) [LB]
+
+ and the highest addressed byte is always below:
+
+ base + (step < 0 ? 0 : seg_len) + access_size [UB]
+
+ Thus:
+
+ LB <= ADDR < UB
+
+ If ALIGN is nonzero, all three values are aligned to at least ALIGN
+ bytes, so:
+
+ LB <= ADDR <= UB - ALIGN
+
+ where "- ALIGN" folds naturally with the "+ access_size" and often
+ cancels it out.
+
+ We don't try to simplify LB and UB beyond this (e.g. by using
+ MIN and MAX based on whether seg_len rather than the stride is
+ negative) because it is possible for the absolute size of the
+ segment to overflow the range of a ssize_t.
+
+ Keeping the pointer_plus outside of the cond_expr should allow
+ the cond_exprs to be shared with other alias checks. */
+ tree indicator = dr_direction_indicator (d.dr);
+ tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+ tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
+ DR_OFFSET (d.dr));
+ addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
+ tree seg_len
+ = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
+
+ tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
+ seg_len, size_zero_node);
+ tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
+ size_zero_node, seg_len);
+ max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
+ size_int (d.access_size - align));
+
+ *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
+ *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
+}
+
+/* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
+ storing the condition in *COND_EXPR. The fallback is to generate a
+ a test that the two accesses do not overlap:
+
+ end_a <= start_b || end_b <= start_a. */
+
+static void
+create_intersect_range_checks (class loop *loop, tree *cond_expr,
+ const dr_with_seg_len_pair_t &alias_pair)
+{
+ const dr_with_seg_len& dr_a = alias_pair.first;
+ const dr_with_seg_len& dr_b = alias_pair.second;
*cond_expr = NULL_TREE;
- if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
+ if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
+ return;
+
+ if (create_ifn_alias_checks (cond_expr, alias_pair))
return;
- tree segment_length_a = dr_a.seg_len;
- tree segment_length_b = dr_b.seg_len;
- tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
- tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
- tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
-
- offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
- offset_a, DR_INIT (dr_a.dr));
- offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
- offset_b, DR_INIT (dr_b.dr));
- addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
- addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
-
- tree seg_a_min = addr_base_a;
- tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
- /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
- bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
- [a, a+12) */
- if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
+ if (create_waw_or_war_checks (cond_expr, alias_pair))
+ return;
+
+ unsigned HOST_WIDE_INT min_align;
+ tree_code cmp_code;
+ /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
+ are equivalent. This is just an optimization heuristic. */
+ if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
+ && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
{
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
- seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
- seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
+ /* In this case adding access_size to seg_len is likely to give
+ a simple X * step, where X is either the number of scalar
+ iterations or the vectorization factor. We're better off
+ keeping that, rather than subtracting an alignment from it.
+
+ In this case the maximum values are exclusive and so there is
+ no alias if the maximum of one segment equals the minimum
+ of another. */
+ min_align = 0;
+ cmp_code = LE_EXPR;
}
-
- tree seg_b_min = addr_base_b;
- tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
- if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
+ else
{
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
- seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
- seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
+ /* Calculate the minimum alignment shared by all four pointers,
+ then arrange for this alignment to be subtracted from the
+ exclusive maximum values to get inclusive maximum values.
+ This "- min_align" is cumulative with a "+ access_size"
+ in the calculation of the maximum values. In the best
+ (and common) case, the two cancel each other out, leaving
+ us with an inclusive bound based only on seg_len. In the
+ worst case we're simply adding a smaller number than before.
+
+ Because the maximum values are inclusive, there is an alias
+ if the maximum value of one segment is equal to the minimum
+ value of the other. */
+ min_align = MIN (dr_a.align, dr_b.align);
+ cmp_code = LT_EXPR;
}
+
+ tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
+ get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
+ get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
+
*cond_expr
= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
- fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
- fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
+ fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
+ fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "using an address-based overlap test\n");
}
/* Create a conditional expression that represents the run-time checks for
that controls which version of the loop gets executed at runtime. */
void
-create_runtime_alias_checks (struct loop *loop,
+create_runtime_alias_checks (class loop *loop,
vec<dr_with_seg_len_pair_t> *alias_pairs,
tree * cond_expr)
{
tree part_cond_expr;
- for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
+ fold_defer_overflow_warnings ();
+ dr_with_seg_len_pair_t *alias_pair;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
{
- const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
- const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
-
+ gcc_assert (alias_pair->flags);
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "create runtime check for data references ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE,
+ "create runtime check for data references %T and %T\n",
+ DR_REF (alias_pair->first.dr),
+ DR_REF (alias_pair->second.dr));
/* Create condition expression for each pair data references. */
- create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
+ create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
if (*cond_expr)
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
*cond_expr, part_cond_expr);
else
*cond_expr = part_cond_expr;
}
+ fold_undefer_and_ignore_overflow_warnings ();
}
/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
/* Returns true if the address of OBJ is invariant in LOOP. */
static bool
-object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
+object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
{
while (handled_component_p (obj))
{
if (TREE_CODE (obj) == ARRAY_REF)
{
- /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
- need to check the stride and the lower bound of the reference. */
- if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
- loop->num)
- || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
- loop->num))
- return false;
+ for (int i = 1; i < 4; ++i)
+ if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
+ loop->num))
+ return false;
}
else if (TREE_CODE (obj) == COMPONENT_REF)
{
bool
dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
- bool loop_nest)
+ class loop *loop_nest)
{
tree addr_a = DR_BASE_OBJECT (a);
tree addr_b = DR_BASE_OBJECT (b);
if (!loop_nest)
{
aff_tree off1, off2;
- widest_int size1, size2;
+ poly_widest_int size1, size2;
get_inner_reference_aff (DR_REF (a), &off1, &size1);
get_inner_reference_aff (DR_REF (b), &off2, &size2);
aff_combination_scale (&off1, -1);
if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
&& (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
+ /* For cross-iteration dependences the cliques must be valid for the
+ whole loop, not just individual iterations. */
+ && (!loop_nest
+ || MR_DEPENDENCE_CLIQUE (addr_a) == 1
+ || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
&& MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
&& MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
return false;
return refs_may_alias_p (addr_a, addr_b);
}
+/* REF_A and REF_B both satisfy access_fn_component_p. Return true
+ if it is meaningful to compare their associated access functions
+ when checking for dependencies. */
+
+static bool
+access_fn_components_comparable_p (tree ref_a, tree ref_b)
+{
+ /* Allow pairs of component refs from the following sets:
+
+ { REALPART_EXPR, IMAGPART_EXPR }
+ { COMPONENT_REF }
+ { ARRAY_REF }. */
+ tree_code code_a = TREE_CODE (ref_a);
+ tree_code code_b = TREE_CODE (ref_b);
+ if (code_a == IMAGPART_EXPR)
+ code_a = REALPART_EXPR;
+ if (code_b == IMAGPART_EXPR)
+ code_b = REALPART_EXPR;
+ if (code_a != code_b)
+ return false;
+
+ if (TREE_CODE (ref_a) == COMPONENT_REF)
+ /* ??? We cannot simply use the type of operand #0 of the refs here as
+ the Fortran compiler smuggles type punning into COMPONENT_REFs.
+ Use the DECL_CONTEXT of the FIELD_DECLs instead. */
+ return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
+ == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
+
+ return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
+ TREE_TYPE (TREE_OPERAND (ref_b, 0)));
+}
+
/* Initialize a data dependence relation between data accesses A and
B. NB_LOOPS is the number of loops surrounding the references: the
size of the classic distance/direction vectors. */
struct data_dependence_relation *res;
unsigned int i;
- res = XNEW (struct data_dependence_relation);
+ res = XCNEW (struct data_dependence_relation);
DDR_A (res) = a;
DDR_B (res) = b;
DDR_LOOP_NEST (res).create (0);
- DDR_REVERSED_P (res) = false;
DDR_SUBSCRIPTS (res).create (0);
DDR_DIR_VECTS (res).create (0);
DDR_DIST_VECTS (res).create (0);
}
/* If the data references do not alias, then they are independent. */
- if (!dr_may_alias_p (a, b, loop_nest.exists ()))
+ if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
{
DDR_ARE_DEPENDENT (res) = chrec_known;
return res;
}
- /* The case where the references are exactly the same. */
- if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
+ unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
+ unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
+ if (num_dimensions_a == 0 || num_dimensions_b == 0)
{
- if ((loop_nest.exists ()
- && !object_address_invariant_in_loop_p (loop_nest[0],
- DR_BASE_OBJECT (a)))
- || DR_NUM_DIMENSIONS (a) == 0)
+ DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+ return res;
+ }
+
+ /* For unconstrained bases, the root (highest-indexed) subscript
+ describes a variation in the base of the original DR_REF rather
+ than a component access. We have no type that accurately describes
+ the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
+ applying this subscript) so limit the search to the last real
+ component access.
+
+ E.g. for:
+
+ void
+ f (int a[][8], int b[][8])
{
- DDR_ARE_DEPENDENT (res) = chrec_dont_know;
- return res;
+ for (int i = 0; i < 8; ++i)
+ a[i * 2][0] = b[i][0];
+ }
+
+ the a and b accesses have a single ARRAY_REF component reference [0]
+ but have two subscripts. */
+ if (DR_UNCONSTRAINED_BASE (a))
+ num_dimensions_a -= 1;
+ if (DR_UNCONSTRAINED_BASE (b))
+ num_dimensions_b -= 1;
+
+ /* These structures describe sequences of component references in
+ DR_REF (A) and DR_REF (B). Each component reference is tied to a
+ specific access function. */
+ struct {
+ /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
+ DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
+ indices. In C notation, these are the indices of the rightmost
+ component references; e.g. for a sequence .b.c.d, the start
+ index is for .d. */
+ unsigned int start_a;
+ unsigned int start_b;
+
+ /* The sequence contains LENGTH consecutive access functions from
+ each DR. */
+ unsigned int length;
+
+ /* The enclosing objects for the A and B sequences respectively,
+ i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
+ and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
+ tree object_a;
+ tree object_b;
+ } full_seq = {}, struct_seq = {};
+
+ /* Before each iteration of the loop:
+
+ - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
+ - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
+ unsigned int index_a = 0;
+ unsigned int index_b = 0;
+ tree ref_a = DR_REF (a);
+ tree ref_b = DR_REF (b);
+
+ /* Now walk the component references from the final DR_REFs back up to
+ the enclosing base objects. Each component reference corresponds
+ to one access function in the DR, with access function 0 being for
+ the final DR_REF and the highest-indexed access function being the
+ one that is applied to the base of the DR.
+
+ Look for a sequence of component references whose access functions
+ are comparable (see access_fn_components_comparable_p). If more
+ than one such sequence exists, pick the one nearest the base
+ (which is the leftmost sequence in C notation). Store this sequence
+ in FULL_SEQ.
+
+ For example, if we have:
+
+ struct foo { struct bar s; ... } (*a)[10], (*b)[10];
+
+ A: a[0][i].s.c.d
+ B: __real b[0][i].s.e[i].f
+
+ (where d is the same type as the real component of f) then the access
+ functions would be:
+
+ 0 1 2 3
+ A: .d .c .s [i]
+
+ 0 1 2 3 4 5
+ B: __real .f [i] .e .s [i]
+
+ The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
+ and [i] is an ARRAY_REF. However, the A1/B3 column contains two
+ COMPONENT_REF accesses for struct bar, so is comparable. Likewise
+ the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
+ so is comparable. The A3/B5 column contains two ARRAY_REFs that
+ index foo[10] arrays, so is again comparable. The sequence is
+ therefore:
+
+ A: [1, 3] (i.e. [i].s.c)
+ B: [3, 5] (i.e. [i].s.e)
+
+ Also look for sequences of component references whose access
+ functions are comparable and whose enclosing objects have the same
+ RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
+ example, STRUCT_SEQ would be:
+
+ A: [1, 2] (i.e. s.c)
+ B: [3, 4] (i.e. s.e) */
+ while (index_a < num_dimensions_a && index_b < num_dimensions_b)
+ {
+ /* REF_A and REF_B must be one of the component access types
+ allowed by dr_analyze_indices. */
+ gcc_checking_assert (access_fn_component_p (ref_a));
+ gcc_checking_assert (access_fn_component_p (ref_b));
+
+ /* Get the immediately-enclosing objects for REF_A and REF_B,
+ i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
+ and DR_ACCESS_FN (B, INDEX_B). */
+ tree object_a = TREE_OPERAND (ref_a, 0);
+ tree object_b = TREE_OPERAND (ref_b, 0);
+
+ tree type_a = TREE_TYPE (object_a);
+ tree type_b = TREE_TYPE (object_b);
+ if (access_fn_components_comparable_p (ref_a, ref_b))
+ {
+ /* This pair of component accesses is comparable for dependence
+ analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
+ DR_ACCESS_FN (B, INDEX_B) in the sequence. */
+ if (full_seq.start_a + full_seq.length != index_a
+ || full_seq.start_b + full_seq.length != index_b)
+ {
+ /* The accesses don't extend the current sequence,
+ so start a new one here. */
+ full_seq.start_a = index_a;
+ full_seq.start_b = index_b;
+ full_seq.length = 0;
+ }
+
+ /* Add this pair of references to the sequence. */
+ full_seq.length += 1;
+ full_seq.object_a = object_a;
+ full_seq.object_b = object_b;
+
+ /* If the enclosing objects are structures (and thus have the
+ same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
+ if (TREE_CODE (type_a) == RECORD_TYPE)
+ struct_seq = full_seq;
+
+ /* Move to the next containing reference for both A and B. */
+ ref_a = object_a;
+ ref_b = object_b;
+ index_a += 1;
+ index_b += 1;
+ continue;
+ }
+
+ /* Try to approach equal type sizes. */
+ if (!COMPLETE_TYPE_P (type_a)
+ || !COMPLETE_TYPE_P (type_b)
+ || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
+ || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
+ break;
+
+ unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
+ unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
+ if (size_a <= size_b)
+ {
+ index_a += 1;
+ ref_a = object_a;
+ }
+ if (size_b <= size_a)
+ {
+ index_b += 1;
+ ref_b = object_b;
}
- DDR_AFFINE_P (res) = true;
- DDR_ARE_DEPENDENT (res) = NULL_TREE;
- DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
- DDR_LOOP_NEST (res) = loop_nest;
- DDR_INNER_LOOP (res) = 0;
- DDR_SELF_REFERENCE (res) = true;
- for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
- {
- struct subscript *subscript;
-
- subscript = XNEW (struct subscript);
- SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
- SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
- SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
- SUB_DISTANCE (subscript) = chrec_dont_know;
- DDR_SUBSCRIPTS (res).safe_push (subscript);
- }
- return res;
}
- /* If the references do not access the same object, we do not know
- whether they alias or not. We do not care about TBAA or alignment
- info so we can use OEP_ADDRESS_OF to avoid false negatives.
- But the accesses have to use compatible types as otherwise the
- built indices would not match. */
- if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), OEP_ADDRESS_OF)
- || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a)),
- TREE_TYPE (DR_BASE_OBJECT (b))))
+ /* See whether FULL_SEQ ends at the base and whether the two bases
+ are equal. We do not care about TBAA or alignment info so we can
+ use OEP_ADDRESS_OF to avoid false negatives. */
+ tree base_a = DR_BASE_OBJECT (a);
+ tree base_b = DR_BASE_OBJECT (b);
+ bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
+ && full_seq.start_b + full_seq.length == num_dimensions_b
+ && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
+ && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
+ && types_compatible_p (TREE_TYPE (base_a),
+ TREE_TYPE (base_b))
+ && (!loop_nest.exists ()
+ || (object_address_invariant_in_loop_p
+ (loop_nest[0], base_a))));
+
+ /* If the bases are the same, we can include the base variation too.
+ E.g. the b accesses in:
+
+ for (int i = 0; i < n; ++i)
+ b[i + 4][0] = b[i][0];
+
+ have a definite dependence distance of 4, while for:
+
+ for (int i = 0; i < n; ++i)
+ a[i + 4][0] = b[i][0];
+
+ the dependence distance depends on the gap between a and b.
+
+ If the bases are different then we can only rely on the sequence
+ rooted at a structure access, since arrays are allowed to overlap
+ arbitrarily and change shape arbitrarily. E.g. we treat this as
+ valid code:
+
+ int a[256];
+ ...
+ ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
+
+ where two lvalues with the same int[4][3] type overlap, and where
+ both lvalues are distinct from the object's declared type. */
+ if (same_base_p)
{
- DDR_ARE_DEPENDENT (res) = chrec_dont_know;
- return res;
+ if (DR_UNCONSTRAINED_BASE (a))
+ full_seq.length += 1;
}
+ else
+ full_seq = struct_seq;
- /* If the base of the object is not invariant in the loop nest, we cannot
- analyze it. TODO -- in fact, it would suffice to record that there may
- be arbitrary dependences in the loops where the base object varies. */
- if ((loop_nest.exists ()
- && !object_address_invariant_in_loop_p (loop_nest[0], DR_BASE_OBJECT (a)))
- || DR_NUM_DIMENSIONS (a) == 0)
+ /* Punt if we didn't find a suitable sequence. */
+ if (full_seq.length == 0)
{
DDR_ARE_DEPENDENT (res) = chrec_dont_know;
return res;
}
- /* If the number of dimensions of the access to not agree we can have
- a pointer access to a component of the array element type and an
- array access while the base-objects are still the same. Punt. */
- if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
+ if (!same_base_p)
{
- DDR_ARE_DEPENDENT (res) = chrec_dont_know;
- return res;
+ /* Partial overlap is possible for different bases when strict aliasing
+ is not in effect. It's also possible if either base involves a union
+ access; e.g. for:
+
+ struct s1 { int a[2]; };
+ struct s2 { struct s1 b; int c; };
+ struct s3 { int d; struct s1 e; };
+ union u { struct s2 f; struct s3 g; } *p, *q;
+
+ the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
+ "p->g.e" (base "p->g") and might partially overlap the s1 at
+ "q->g.e" (base "q->g"). */
+ if (!flag_strict_aliasing
+ || ref_contains_union_access_p (full_seq.object_a)
+ || ref_contains_union_access_p (full_seq.object_b))
+ {
+ DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+ return res;
+ }
+
+ DDR_COULD_BE_INDEPENDENT_P (res) = true;
+ if (!loop_nest.exists ()
+ || (object_address_invariant_in_loop_p (loop_nest[0],
+ full_seq.object_a)
+ && object_address_invariant_in_loop_p (loop_nest[0],
+ full_seq.object_b)))
+ {
+ DDR_OBJECT_A (res) = full_seq.object_a;
+ DDR_OBJECT_B (res) = full_seq.object_b;
+ }
}
DDR_AFFINE_P (res) = true;
DDR_ARE_DEPENDENT (res) = NULL_TREE;
- DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
+ DDR_SUBSCRIPTS (res).create (full_seq.length);
DDR_LOOP_NEST (res) = loop_nest;
- DDR_INNER_LOOP (res) = 0;
DDR_SELF_REFERENCE (res) = false;
- for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
+ for (i = 0; i < full_seq.length; ++i)
{
struct subscript *subscript;
subscript = XNEW (struct subscript);
+ SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
+ SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
conflict_function *ret = XCNEW (conflict_function);
va_list ap;
- gcc_assert (0 < n && n <= MAX_DIM);
+ gcc_assert (n > 0 && n <= MAX_DIM);
va_start (ap, n);
ret->n = n;
chrec_dont_know. */
static tree
-max_stmt_executions_tree (struct loop *loop)
+max_stmt_executions_tree (class loop *loop)
{
widest_int nit;
{
if (value0 == false)
{
- if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
+ if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
+ || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "siv test failed: chrec not positive.\n");
if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
{
HOST_WIDE_INT numiter;
- struct loop *loop = get_chrec_loop (chrec_b);
+ class loop *loop = get_chrec_loop (chrec_b);
*overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
tmp = fold_build2 (EXACT_DIV_EXPR, type,
}
else
{
- if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
+ if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
+ || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "siv test failed: chrec not positive.\n");
if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
{
HOST_WIDE_INT numiter;
- struct loop *loop = get_chrec_loop (chrec_b);
+ class loop *loop = get_chrec_loop (chrec_b);
*overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
switch (TREE_CODE (chrec))
{
case POLYNOMIAL_CHREC:
+ if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
+ return chrec_dont_know;
A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
mat[i][j] = (i == j) ? 1 : 0;
}
-/* Return the first nonzero element of vector VEC1 between START and N.
- We must have START <= N. Returns N if VEC1 is the zero vector. */
+/* Return the index of the first nonzero element of vector VEC1 between
+ START and N. We must have START <= N.
+ Returns N if VEC1 is the zero vector. */
static int
lambda_vector_first_nz (lambda_vector vec1, int n, int start)
R2 = R2 + CONST1 * R1. */
static void
-lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
+lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
+ lambda_int const1)
{
int i;
static void
lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
- int size, int const1)
+ int size, lambda_int const1)
{
int i;
{
while (S[i][j] != 0)
{
- int sigma, factor, a, b;
+ lambda_int sigma, factor, a, b;
a = S[i-1][j];
b = S[i][j];
sigma = (a * b < 0) ? -1: 1;
- a = abs (a);
- b = abs (b);
+ a = abs_hwi (a);
+ b = abs_hwi (b);
factor = sigma * (a / b);
lambda_matrix_row_add (S, n, i, i-1, -factor);
tree *last_conflicts)
{
unsigned nb_vars_a, nb_vars_b, dim;
- HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
+ HOST_WIDE_INT gamma, gcd_alpha_beta;
lambda_matrix A, U, S;
struct obstack scratch_obstack;
A = lambda_matrix_new (dim, 1, &scratch_obstack);
S = lambda_matrix_new (dim, 1, &scratch_obstack);
- init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
- init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
- gamma = init_b - init_a;
+ tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
+ tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
+ if (init_a == chrec_dont_know
+ || init_b == chrec_dont_know)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "affine-affine test failed: "
+ "representation issue.\n");
+ *overlaps_a = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
+ *last_conflicts = chrec_dont_know;
+ goto end_analyze_subs_aa;
+ }
+ gamma = int_cst_value (init_b) - int_cst_value (init_a);
/* Don't do all the hard work of solving the Diophantine equation
when we already know the solution: for example,
if (niter > 0)
{
- HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
- FLOOR_DIV (niter_b - j0, j1));
- HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
-
/* If the overlap occurs outside of the bounds of the
loop, there is no dependence. */
if (x1 >= niter_a || y1 >= niter_b)
*last_conflicts = integer_zero_node;
goto end_analyze_subs_aa;
}
+
+ /* max stmt executions can get quite large, avoid
+ overflows by using wide ints here. */
+ widest_int tau2
+ = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
+ wi::sdiv_floor (wi::sub (niter_b, j0), j1));
+ widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
+ if (wi::min_precision (last_conflict, SIGNED)
+ <= TYPE_PRECISION (integer_type_node))
+ *last_conflicts
+ = build_int_cst (integer_type_node,
+ last_conflict.to_shwi ());
else
- *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
+ *last_conflicts = chrec_dont_know;
}
else
*last_conflicts = chrec_dont_know;
conflict_function **overlaps_a,
conflict_function **overlaps_b,
tree *last_conflicts,
- struct loop *loop_nest)
+ class loop *loop_nest)
{
tree type, difference;
}
else if (evolution_function_is_constant_p (difference)
- /* For the moment, the following is verified:
- evolution_function_is_affine_multivariate_p (chrec_a,
- loop_nest->num) */
+ && evolution_function_is_affine_multivariate_p (chrec_a,
+ loop_nest->num)
&& !gcd_of_steps_may_divide_p (chrec_a, difference))
{
/* testsuite/.../ssa-chrec-33.c
dependence_stats.num_miv_independent++;
}
- else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
- && !chrec_contains_symbols (chrec_a)
- && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
- && !chrec_contains_symbols (chrec_b))
+ else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
+ && !chrec_contains_symbols (chrec_a, loop_nest)
+ && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
+ && !chrec_contains_symbols (chrec_b, loop_nest))
{
/* testsuite/.../ssa-chrec-35.c
{0, +, 1}_2 vs. {0, +, 1}_3
tree chrec_b,
conflict_function **overlap_iterations_a,
conflict_function **overlap_iterations_b,
- tree *last_conflicts, struct loop *loop_nest)
+ tree *last_conflicts, class loop *loop_nest)
{
unsigned int lnn = loop_nest->num;
}
/* Return false when fail to represent the data dependence as a
- distance vector. INIT_B is set to true when a component has been
+ distance vector. A_INDEX is the index of the first reference
+ (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
+ second reference. INIT_B is set to true when a component has been
added to the distance vector DIST_V. INDEX_CARRY is then set to
the index in DIST_V that carries the dependence. */
static bool
build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
- struct data_reference *ddr_a,
- struct data_reference *ddr_b,
+ unsigned int a_index, unsigned int b_index,
lambda_vector dist_v, bool *init_b,
int *index_carry)
{
unsigned i;
lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
+ class loop *loop = DDR_LOOP_NEST (ddr)[0];
for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
{
return false;
}
- access_fn_a = DR_ACCESS_FN (ddr_a, i);
- access_fn_b = DR_ACCESS_FN (ddr_b, i);
+ access_fn_a = SUB_ACCESS_FN (subscript, a_index);
+ access_fn_b = SUB_ACCESS_FN (subscript, b_index);
if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
&& TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
return false;
}
+ /* When data references are collected in a loop while data
+ dependences are analyzed in loop nest nested in the loop, we
+ would have more number of access functions than number of
+ loops. Skip access functions of loops not in the loop nest.
+
+ See PR89725 for more information. */
+ if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
+ continue;
+
dist = int_cst_value (SUB_DISTANCE (subscript));
index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
*index_carry = MIN (index, *index_carry);
constant_access_functions (const struct data_dependence_relation *ddr)
{
unsigned i;
+ subscript *sub;
- for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
- if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
- || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
+ FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
+ if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
+ || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
return false;
return true;
lambda_vector dist_v;
unsigned i;
int index_carry = DDR_NB_LOOPS (ddr);
+ subscript *sub;
+ class loop *loop = DDR_LOOP_NEST (ddr)[0];
- for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
+ FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
{
- tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
+ tree access_fun = SUB_ACCESS_FN (sub, 0);
if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
{
- if (!evolution_function_is_univariate_p (access_fun))
+ if (!evolution_function_is_univariate_p (access_fun, loop->num))
{
if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
{
return;
}
- access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
+ access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
add_multivariate_self_dist (ddr, access_fun);
return;
}
+ /* When data references are collected in a loop while data
+ dependences are analyzed in loop nest nested in the loop, we
+ would have more number of access functions than number of
+ loops. Skip access functions of loops not in the loop nest.
+
+ See PR89725 for more information. */
+ if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
+ loop))
+ continue;
+
index_carry = MIN (index_carry,
index_in_loop_nest (CHREC_VARIABLE (access_fun),
DDR_LOOP_NEST (ddr)));
{
lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
- dist_v[DDR_INNER_LOOP (ddr)] = 1;
+ dist_v[0] = 1;
save_dist_v (ddr, dist_v);
}
}
}
+/* Return true when the DDR contains two data references that have the
+ same access functions. */
+
+static inline bool
+same_access_functions (const struct data_dependence_relation *ddr)
+{
+ unsigned i;
+ subscript *sub;
+
+ FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
+ if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
+ SUB_ACCESS_FN (sub, 1)))
+ return false;
+
+ return true;
+}
+
/* Compute the classic per loop distance vector. DDR is the data
dependence relation to build a vector from. Return false when fail
to represent the data dependence as a distance vector. */
static bool
build_classic_dist_vector (struct data_dependence_relation *ddr,
- struct loop *loop_nest)
+ class loop *loop_nest)
{
bool init_b = false;
int index_carry = DDR_NB_LOOPS (ddr);
}
dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
- if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
- dist_v, &init_b, &index_carry))
+ if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
return false;
/* Save the distance vector if we initialized one. */
if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
{
lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
- if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
- loop_nest))
+ if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
return false;
compute_subscript_distance (ddr);
- if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
- save_v, &init_b, &index_carry))
+ if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
+ &index_carry))
return false;
save_dist_v (ddr, save_v);
DDR_REVERSED_P (ddr) = true;
{
lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
- if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
- DDR_A (ddr), loop_nest))
+ if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
return false;
compute_subscript_distance (ddr);
- if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
- opposite_v, &init_b,
+ if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
&index_carry))
return false;
}
}
-/* Helper function. Returns true when there is a dependence between
- data references DRA and DRB. */
+/* Helper function. Returns true when there is a dependence between the
+ data references. A_INDEX is the index of the first reference (0 for
+ DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
static bool
subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
- struct data_reference *dra,
- struct data_reference *drb,
- struct loop *loop_nest)
+ unsigned int a_index, unsigned int b_index,
+ class loop *loop_nest)
{
unsigned int i;
tree last_conflicts;
{
conflict_function *overlaps_a, *overlaps_b;
- analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
- DR_ACCESS_FN (drb, i),
+ analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
+ SUB_ACCESS_FN (subscript, b_index),
&overlaps_a, &overlaps_b,
&last_conflicts, loop_nest);
static void
subscript_dependence_tester (struct data_dependence_relation *ddr,
- struct loop *loop_nest)
+ class loop *loop_nest)
{
- if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
+ if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
dependence_stats.num_dependence_dependent++;
compute_subscript_distance (ddr);
static bool
access_functions_are_affine_or_constant_p (const struct data_reference *a,
- const struct loop *loop_nest)
+ const class loop *loop_nest)
{
unsigned int i;
vec<tree> fns = DR_ACCESS_FNS (a);
void
compute_affine_dependence (struct data_dependence_relation *ddr,
- struct loop *loop_nest)
+ class loop *loop_nest)
{
struct data_reference *dra = DDR_A (ddr);
struct data_reference *drb = DDR_B (ddr);
unsigned int i, j;
if ((int) datarefs.length ()
- > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
+ > param_loop_max_datarefs_for_datadeps)
{
struct data_dependence_relation *ddr;
/* True if the memory reference is read. */
bool is_read;
+
+ /* True if the data reference is conditional within the containing
+ statement, i.e. if it might not occur even when the statement
+ is executed and runs to completion. */
+ bool is_conditional_in_stmt;
};
{
case IFN_GOMP_SIMD_LANE:
{
- struct loop *loop = gimple_bb (stmt)->loop_father;
+ class loop *loop = gimple_bb (stmt)->loop_father;
tree uid = gimple_call_arg (stmt, 0);
gcc_assert (TREE_CODE (uid) == SSA_NAME);
if (loop == NULL
{
ref.ref = op1;
ref.is_read = true;
+ ref.is_conditional_in_stmt = false;
references->safe_push (ref);
}
}
type = TREE_TYPE (gimple_call_arg (stmt, 3));
if (TYPE_ALIGN (type) != align)
type = build_aligned_type (type, align);
+ ref.is_conditional_in_stmt = true;
ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
ptr);
references->safe_push (ref);
{
ref.ref = op1;
ref.is_read = true;
+ ref.is_conditional_in_stmt = false;
references->safe_push (ref);
}
}
{
ref.ref = op0;
ref.is_read = false;
+ ref.is_conditional_in_stmt = false;
references->safe_push (ref);
}
return clobbers_memory;
}
}
free (bbs);
-
- if (loop->inner)
- {
- loop = loop->inner;
- while (loop)
- {
- if (loop_nest_has_data_refs (loop))
- return true;
- loop = loop->next;
- }
- }
return false;
}
reference, returns false, otherwise returns true. NEST is the outermost
loop of the loop nest in which the references should be analyzed. */
-bool
-find_data_references_in_stmt (struct loop *nest, gimple *stmt,
+opt_result
+find_data_references_in_stmt (class loop *nest, gimple *stmt,
vec<data_reference_p> *datarefs)
{
unsigned i;
auto_vec<data_ref_loc, 2> references;
data_ref_loc *ref;
- bool ret = true;
data_reference_p dr;
if (get_references_in_stmt (stmt, &references))
- return false;
+ return opt_result::failure_at (stmt, "statement clobbers memory: %G",
+ stmt);
FOR_EACH_VEC_ELT (references, i, ref)
{
- dr = create_data_ref (nest, loop_containing_stmt (stmt),
- ref->ref, stmt, ref->is_read);
+ dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
+ loop_containing_stmt (stmt), ref->ref,
+ stmt, ref->is_read, ref->is_conditional_in_stmt);
gcc_assert (dr != NULL);
datarefs->safe_push (dr);
}
- return ret;
+ return opt_result::success ();
}
/* Stores the data references in STMT to DATAREFS. If there is an
should be analyzed. */
bool
-graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple *stmt,
+graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
vec<data_reference_p> *datarefs)
{
unsigned i;
FOR_EACH_VEC_ELT (references, i, ref)
{
- dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
+ dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
+ ref->is_conditional_in_stmt);
gcc_assert (dr != NULL);
datarefs->safe_push (dr);
}
difficult case, returns NULL_TREE otherwise. */
tree
-find_data_references_in_bb (struct loop *loop, basic_block bb,
+find_data_references_in_bb (class loop *loop, basic_block bb,
vec<data_reference_p> *datarefs)
{
gimple_stmt_iterator bsi;
arithmetic as if they were array accesses, etc. */
tree
-find_data_references_in_loop (struct loop *loop,
+find_data_references_in_loop (class loop *loop,
vec<data_reference_p> *datarefs)
{
basic_block bb, *bbs;
return NULL_TREE;
}
+/* Return the alignment in bytes that DRB is guaranteed to have at all
+ times. */
+
+unsigned int
+dr_alignment (innermost_loop_behavior *drb)
+{
+ /* Get the alignment of BASE_ADDRESS + INIT. */
+ unsigned int alignment = drb->base_alignment;
+ unsigned int misalignment = (drb->base_misalignment
+ + TREE_INT_CST_LOW (drb->init));
+ if (misalignment != 0)
+ alignment = MIN (alignment, misalignment & -misalignment);
+
+ /* Cap it to the alignment of OFFSET. */
+ if (!integer_zerop (drb->offset))
+ alignment = MIN (alignment, drb->offset_alignment);
+
+ /* Cap it to the alignment of STEP. */
+ if (!integer_zerop (drb->step))
+ alignment = MIN (alignment, drb->step_alignment);
+
+ return alignment;
+}
+
+/* If BASE is a pointer-typed SSA name, try to find the object that it
+ is based on. Return this object X on success and store the alignment
+ in bytes of BASE - &X in *ALIGNMENT_OUT. */
+
+static tree
+get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
+{
+ if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
+ return NULL_TREE;
+
+ gimple *def = SSA_NAME_DEF_STMT (base);
+ base = analyze_scalar_evolution (loop_containing_stmt (def), base);
+
+ /* Peel chrecs and record the minimum alignment preserved by
+ all steps. */
+ unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
+ while (TREE_CODE (base) == POLYNOMIAL_CHREC)
+ {
+ unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
+ alignment = MIN (alignment, step_alignment);
+ base = CHREC_LEFT (base);
+ }
+
+ /* Punt if the expression is too complicated to handle. */
+ if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
+ return NULL_TREE;
+
+ /* The only useful cases are those for which a dereference folds to something
+ other than an INDIRECT_REF. */
+ tree ref_type = TREE_TYPE (TREE_TYPE (base));
+ tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
+ if (!ref)
+ return NULL_TREE;
+
+ /* Analyze the base to which the steps we peeled were applied. */
+ poly_int64 bitsize, bitpos, bytepos;
+ machine_mode mode;
+ int unsignedp, reversep, volatilep;
+ tree offset;
+ base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
+ if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
+ return NULL_TREE;
+
+ /* Restrict the alignment to that guaranteed by the offsets. */
+ unsigned int bytepos_alignment = known_alignment (bytepos);
+ if (bytepos_alignment != 0)
+ alignment = MIN (alignment, bytepos_alignment);
+ if (offset)
+ {
+ unsigned int offset_alignment = highest_pow2_factor (offset);
+ alignment = MIN (alignment, offset_alignment);
+ }
+
+ *alignment_out = alignment;
+ return base;
+}
+
+/* Return the object whose alignment would need to be changed in order
+ to increase the alignment of ADDR. Store the maximum achievable
+ alignment in *MAX_ALIGNMENT. */
+
+tree
+get_base_for_alignment (tree addr, unsigned int *max_alignment)
+{
+ tree base = get_base_for_alignment_1 (addr, max_alignment);
+ if (base)
+ return base;
+
+ if (TREE_CODE (addr) == ADDR_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
+ return addr;
+}
+
/* Recursive helper function. */
static bool
-find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
+find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
{
/* Inner loops of the nest should not contain siblings. Example:
when there are two consecutive loops,
appear in the classic distance vector. */
bool
-find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
+find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
{
loop_nest->safe_push (loop);
if (loop->inner)
COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
bool
-compute_data_dependences_for_loop (struct loop *loop,
+compute_data_dependences_for_loop (class loop *loop,
bool compute_self_and_read_read_dependences,
vec<loop_p> *loop_nest,
vec<data_reference_p> *datarefs,
free_data_ref (dr);
datarefs.release ();
}
+
+/* Common routine implementing both dr_direction_indicator and
+ dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
+ to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
+ Return the step as the indicator otherwise. */
+
+static tree
+dr_step_indicator (struct data_reference *dr, int useful_min)
+{
+ tree step = DR_STEP (dr);
+ if (!step)
+ return NULL_TREE;
+ STRIP_NOPS (step);
+ /* Look for cases where the step is scaled by a positive constant
+ integer, which will often be the access size. If the multiplication
+ doesn't change the sign (due to overflow effects) then we can
+ test the unscaled value instead. */
+ if (TREE_CODE (step) == MULT_EXPR
+ && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
+ && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
+ {
+ tree factor = TREE_OPERAND (step, 1);
+ step = TREE_OPERAND (step, 0);
+
+ /* Strip widening and truncating conversions as well as nops. */
+ if (CONVERT_EXPR_P (step)
+ && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
+ step = TREE_OPERAND (step, 0);
+ tree type = TREE_TYPE (step);
+
+ /* Get the range of step values that would not cause overflow. */
+ widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
+ / wi::to_widest (factor));
+ widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
+ / wi::to_widest (factor));
+
+ /* Get the range of values that the unconverted step actually has. */
+ wide_int step_min, step_max;
+ if (TREE_CODE (step) != SSA_NAME
+ || get_range_info (step, &step_min, &step_max) != VR_RANGE)
+ {
+ step_min = wi::to_wide (TYPE_MIN_VALUE (type));
+ step_max = wi::to_wide (TYPE_MAX_VALUE (type));
+ }
+
+ /* Check whether the unconverted step has an acceptable range. */
+ signop sgn = TYPE_SIGN (type);
+ if (wi::les_p (minv, widest_int::from (step_min, sgn))
+ && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
+ {
+ if (wi::ge_p (step_min, useful_min, sgn))
+ return ssize_int (useful_min);
+ else if (wi::lt_p (step_max, 0, sgn))
+ return ssize_int (-1);
+ else
+ return fold_convert (ssizetype, step);
+ }
+ }
+ return DR_STEP (dr);
+}
+
+/* Return a value that is negative iff DR has a negative step. */
+
+tree
+dr_direction_indicator (struct data_reference *dr)
+{
+ return dr_step_indicator (dr, 0);
+}
+
+/* Return a value that is zero iff DR has a zero step. */
+
+tree
+dr_zero_step_indicator (struct data_reference *dr)
+{
+ return dr_step_indicator (dr, 1);
+}
+
+/* Return true if DR is known to have a nonnegative (but possibly zero)
+ step. */
+
+bool
+dr_known_forward_stride_p (struct data_reference *dr)
+{
+ tree indicator = dr_direction_indicator (dr);
+ tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+ return neg_step_val && integer_zerop (neg_step_val);
+}