/* Data references and dependences detectors.
- Copyright (C) 2003-2020 Free Software Foundation, Inc.
+ Copyright (C) 2003-2021 Free Software Foundation, Inc.
Contributed by Sebastian Pop <pop@cri.ensmp.fr>
This file is part of GCC.
#include "tree-eh.h"
#include "ssa.h"
#include "internal-fn.h"
+#include "vr-values.h"
+#include "range-op.h"
+#include "tree-ssa-loop-ivopts.h"
static struct datadep_stats
{
/* Returns true iff A divides B. */
static inline bool
-int_divides_p (int a, int b)
+int_divides_p (lambda_int a, lambda_int b)
{
return ((b % a) == 0);
}
static void
dump_data_references (FILE *file, vec<data_reference_p> datarefs)
{
- unsigned int i;
- struct data_reference *dr;
-
- FOR_EACH_VEC_ELT (datarefs, i, dr)
+ for (data_reference *dr : datarefs)
dump_data_reference (file, dr);
}
print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
int length)
{
- unsigned j;
- lambda_vector v;
-
- FOR_EACH_VEC_ELT (dir_vects, j, v)
+ for (lambda_vector v : dir_vects)
print_direction_vector (outf, v, length);
}
int i;
for (i = 0; i < n; i++)
- fprintf (outfile, "%3d ", (int)vector[i]);
+ fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
fprintf (outfile, "\n");
}
print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
int length)
{
- unsigned j;
- lambda_vector v;
-
- FOR_EACH_VEC_ELT (dist_vects, j, v)
+ for (lambda_vector v : dist_vects)
print_lambda_vector (outf, v, length);
}
/* Dump function for a DATA_DEPENDENCE_RELATION structure. */
DEBUG_FUNCTION void
-dump_data_dependence_relation (FILE *outf,
- struct data_dependence_relation *ddr)
+dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
{
struct data_reference *dra, *drb;
/* Debug version. */
DEBUG_FUNCTION void
-debug_data_dependence_relation (struct data_dependence_relation *ddr)
+debug_data_dependence_relation (const struct data_dependence_relation *ddr)
{
dump_data_dependence_relation (stderr, ddr);
}
/* Dump into FILE all the dependence relations from DDRS. */
DEBUG_FUNCTION void
-dump_data_dependence_relations (FILE *file,
- vec<ddr_p> ddrs)
+dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
{
- unsigned int i;
- struct data_dependence_relation *ddr;
-
- FOR_EACH_VEC_ELT (ddrs, i, ddr)
+ for (auto ddr : ddrs)
dump_data_dependence_relation (file, ddr);
}
DEBUG_FUNCTION void
dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
{
- unsigned int i, j;
- struct data_dependence_relation *ddr;
- lambda_vector v;
-
- FOR_EACH_VEC_ELT (ddrs, i, ddr)
+ for (data_dependence_relation *ddr : ddrs)
if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
{
- FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
+ for (lambda_vector v : DDR_DIST_VECTS (ddr))
{
fprintf (file, "DISTANCE_V (");
print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
fprintf (file, ")\n");
}
- FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
+ for (lambda_vector v : DDR_DIR_VECTS (ddr))
{
fprintf (file, "DIRECTION_V (");
print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
DEBUG_FUNCTION void
dump_ddrs (FILE *file, vec<ddr_p> ddrs)
{
- unsigned int i;
- struct data_dependence_relation *ddr;
-
- FOR_EACH_VEC_ELT (ddrs, i, ddr)
+ for (data_dependence_relation *ddr : ddrs)
dump_data_dependence_relation (file, ddr);
fprintf (file, "\n\n");
dump_ddrs (stderr, ddrs);
}
+/* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
+ OP0 CODE OP1, where:
+
+ - OP0 CODE OP1 has integral type TYPE
+ - the range of OP0 is given by OP0_RANGE and
+ - the range of OP1 is given by OP1_RANGE.
+
+ Independently of RESULT_RANGE, try to compute:
+
+ DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
+ - (sizetype) (OP0 CODE OP1)
+
+ as a constant and subtract DELTA from the ssizetype constant in *OFF.
+ Return true on success, or false if DELTA is not known at compile time.
+
+ Truncation and sign changes are known to distribute over CODE, i.e.
+
+ (itype) (A CODE B) == (itype) A CODE (itype) B
+
+ for any integral type ITYPE whose precision is no greater than the
+ precision of A and B. */
+
+static bool
+compute_distributive_range (tree type, value_range &op0_range,
+ tree_code code, value_range &op1_range,
+ tree *off, value_range *result_range)
+{
+ gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
+ if (result_range)
+ {
+ range_operator *op = range_op_handler (code, type);
+ op->fold_range (*result_range, type, op0_range, op1_range);
+ }
+
+ /* The distributive property guarantees that if TYPE is no narrower
+ than SIZETYPE,
+
+ (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
+
+ and so we can treat DELTA as zero. */
+ if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
+ return true;
+
+ /* If overflow is undefined, we can assume that:
+
+ X == (ssizetype) OP0 CODE (ssizetype) OP1
+
+ is within the range of TYPE, i.e.:
+
+ X == (ssizetype) (TYPE) X
+
+ Distributing the (TYPE) truncation over X gives:
+
+ X == (ssizetype) (OP0 CODE OP1)
+
+ Casting both sides to sizetype and distributing the sizetype cast
+ over X gives:
+
+ (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
+
+ and so we can treat DELTA as zero. */
+ if (TYPE_OVERFLOW_UNDEFINED (type))
+ return true;
+
+ /* Compute the range of:
+
+ (ssizetype) OP0 CODE (ssizetype) OP1
+
+ The distributive property guarantees that this has the same bitpattern as:
+
+ (sizetype) OP0 CODE (sizetype) OP1
+
+ but its range is more conducive to analysis. */
+ range_cast (op0_range, ssizetype);
+ range_cast (op1_range, ssizetype);
+ value_range wide_range;
+ range_operator *op = range_op_handler (code, ssizetype);
+ bool saved_flag_wrapv = flag_wrapv;
+ flag_wrapv = 1;
+ op->fold_range (wide_range, ssizetype, op0_range, op1_range);
+ flag_wrapv = saved_flag_wrapv;
+ if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
+ return false;
+
+ wide_int lb = wide_range.lower_bound ();
+ wide_int ub = wide_range.upper_bound ();
+
+ /* Calculate the number of times that each end of the range overflows or
+ underflows TYPE. We can only calculate DELTA if the numbers match. */
+ unsigned int precision = TYPE_PRECISION (type);
+ if (!TYPE_UNSIGNED (type))
+ {
+ wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
+ lb -= type_min;
+ ub -= type_min;
+ }
+ wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
+ lb &= upper_bits;
+ ub &= upper_bits;
+ if (lb != ub)
+ return false;
+
+ /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
+ negative values indicating underflow. The low PRECISION bits of LB
+ are clear, so DELTA is therefore LB (== UB). */
+ *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
+ return true;
+}
+
+/* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
+ given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
+ FROM_TYPE are integral types. */
+
+static bool
+nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
+{
+ gcc_assert (INTEGRAL_TYPE_P (to_type)
+ && INTEGRAL_TYPE_P (from_type)
+ && !TYPE_OVERFLOW_TRAPS (to_type)
+ && !TYPE_OVERFLOW_TRAPS (from_type));
+
+ /* Converting to something no narrower than sizetype and then to sizetype
+ is equivalent to converting directly to sizetype. */
+ if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
+ return true;
+
+ /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
+ if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
+ && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
+ return true;
+
+ /* For narrowing conversions, we could in principle test whether
+ the bits in FROM_TYPE but not in TO_TYPE have a fixed value
+ and apply a constant adjustment.
+
+ For other conversions (which involve a sign change) we could
+ check that the signs are always equal, and apply a constant
+ adjustment if the signs are negative.
+
+ However, both cases should be rare. */
+ return range_fits_type_p (&range, TYPE_PRECISION (to_type),
+ TYPE_SIGN (to_type));
+}
+
static void
-split_constant_offset (tree exp, tree *var, tree *off,
+split_constant_offset (tree type, tree *var, tree *off,
+ value_range *result_range,
hash_map<tree, std::pair<tree, tree> > &cache,
unsigned *limit);
-/* Helper function for split_constant_offset. Expresses OP0 CODE OP1
- (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
- constant of type ssizetype, and returns true. If we cannot do this
- with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
- is returned. */
+/* Helper function for split_constant_offset. If TYPE is a pointer type,
+ try to express OP0 CODE OP1 as:
+
+ POINTER_PLUS <*VAR, (sizetype) *OFF>
+
+ where:
+
+ - *VAR has type TYPE
+ - *OFF is a constant of type ssizetype.
+
+ If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
+
+ *VAR + (sizetype) *OFF
+
+ where:
+
+ - *VAR has type sizetype
+ - *OFF is a constant of type ssizetype.
+
+ In both cases, OP0 CODE OP1 has type TYPE.
+
+ Return true on success. A false return value indicates that we can't
+ do better than set *OFF to zero.
+
+ When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
+ if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
+
+ CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
+ visited. LIMIT counts down the number of SSA names that we are
+ allowed to process before giving up. */
static bool
split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
- tree *var, tree *off,
+ tree *var, tree *off, value_range *result_range,
hash_map<tree, std::pair<tree, tree> > &cache,
unsigned *limit)
{
tree var0, var1;
tree off0, off1;
- enum tree_code ocode = code;
+ value_range op0_range, op1_range;
*var = NULL_TREE;
*off = NULL_TREE;
+ if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
+ return false;
+
switch (code)
{
case INTEGER_CST:
- *var = build_int_cst (type, 0);
+ *var = size_int (0);
*off = fold_convert (ssizetype, op0);
+ if (result_range)
+ result_range->set (op0, op0);
return true;
case POINTER_PLUS_EXPR:
- ocode = PLUS_EXPR;
- /* FALLTHROUGH */
+ split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
+ split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
+ *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
+ *off = size_binop (PLUS_EXPR, off0, off1);
+ return true;
+
case PLUS_EXPR:
case MINUS_EXPR:
- if (TREE_CODE (op1) == INTEGER_CST)
- {
- split_constant_offset (op0, &var0, &off0, cache, limit);
- *var = var0;
- *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
- return true;
- }
- split_constant_offset (op0, &var0, &off0, cache, limit);
- split_constant_offset (op1, &var1, &off1, cache, limit);
- *var = fold_build2 (code, type, var0, var1);
- *off = size_binop (ocode, off0, off1);
+ split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
+ split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
+ *off = size_binop (code, off0, off1);
+ if (!compute_distributive_range (type, op0_range, code, op1_range,
+ off, result_range))
+ return false;
+ *var = fold_build2 (code, sizetype, var0, var1);
return true;
case MULT_EXPR:
if (TREE_CODE (op1) != INTEGER_CST)
return false;
- split_constant_offset (op0, &var0, &off0, cache, limit);
- *var = fold_build2 (MULT_EXPR, type, var0, op1);
+ split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
+ op1_range.set (op1, op1);
*off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
+ if (!compute_distributive_range (type, op0_range, code, op1_range,
+ off, result_range))
+ return false;
+ *var = fold_build2 (MULT_EXPR, sizetype, var0,
+ fold_convert (sizetype, op1));
return true;
case ADDR_EXPR:
if (poffset)
{
- split_constant_offset (poffset, &poffset, &off1, cache, limit);
+ split_constant_offset (poffset, &poffset, &off1, nullptr,
+ cache, limit);
off0 = size_binop (PLUS_EXPR, off0, off1);
- if (POINTER_TYPE_P (TREE_TYPE (base)))
- base = fold_build_pointer_plus (base, poffset);
- else
- base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
- fold_convert (TREE_TYPE (base), poffset));
+ base = fold_build_pointer_plus (base, poffset);
}
var0 = fold_convert (type, base);
return false;
*var = e.first;
*off = e.second;
+ /* The caller sets the range in this case. */
return true;
}
e = std::make_pair (op0, ssize_int (0));
var1 = gimple_assign_rhs2 (def_stmt);
bool res = split_constant_offset_1 (type, var0, subcode, var1,
- var, off, cache, limit);
+ var, off, nullptr, cache, limit);
if (res && use_cache)
*cache.get (op0) = std::make_pair (*var, *off);
+ /* The caller sets the range in this case. */
return res;
}
CASE_CONVERT:
{
- /* We must not introduce undefined overflow, and we must not change
- the value. Hence we're okay if the inner type doesn't overflow
- to start with (pointer or signed), the outer type also is an
- integer or pointer and the outer precision is at least as large
- as the inner. */
+ /* We can only handle the following conversions:
+
+ - Conversions from one pointer type to another pointer type.
+
+ - Conversions from one non-trapping integral type to another
+ non-trapping integral type. In this case, the recursive
+ call makes sure that:
+
+ (sizetype) OP0
+
+ can be expressed as a sizetype operation involving VAR and OFF,
+ and all we need to do is check whether:
+
+ (sizetype) OP0 == (sizetype) (TYPE) OP0
+
+ - Conversions from a non-trapping sizetype-size integral type to
+ a like-sized pointer type. In this case, the recursive call
+ makes sure that:
+
+ (sizetype) OP0 == *VAR + (sizetype) *OFF
+
+ and we can convert that to:
+
+ POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
+
+ - Conversions from a sizetype-sized pointer type to a like-sized
+ non-trapping integral type. In this case, the recursive call
+ makes sure that:
+
+ OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
+
+ where the POINTER_PLUS and *VAR have the same precision as
+ TYPE (and the same precision as sizetype). Then:
+
+ (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
|| (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
- && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
- && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
+ && (POINTER_TYPE_P (type)
+ || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
+ && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
+ || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
+ && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
{
- if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
+ if (POINTER_TYPE_P (type))
{
- /* Split the unconverted operand and try to prove that
- wrapping isn't a problem. */
- tree tmp_var, tmp_off;
- split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
-
- /* See whether we have an SSA_NAME whose range is known
- to be [A, B]. */
- if (TREE_CODE (tmp_var) != SSA_NAME)
- return false;
- wide_int var_min, var_max;
- value_range_kind vr_type = get_range_info (tmp_var, &var_min,
- &var_max);
- wide_int var_nonzero = get_nonzero_bits (tmp_var);
- signop sgn = TYPE_SIGN (itype);
- if (intersect_range_with_nonzero_bits (vr_type, &var_min,
- &var_max, var_nonzero,
- sgn) != VR_RANGE)
- return false;
-
- /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
- is known to be [A + TMP_OFF, B + TMP_OFF], with all
- operations done in ITYPE. The addition must overflow
- at both ends of the range or at neither. */
- wi::overflow_type overflow[2];
- unsigned int prec = TYPE_PRECISION (itype);
- wide_int woff = wi::to_wide (tmp_off, prec);
- wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
- wi::add (var_max, woff, sgn, &overflow[1]);
- if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
- return false;
-
- /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
- widest_int diff = (widest_int::from (op0_min, sgn)
- - widest_int::from (var_min, sgn));
- var0 = tmp_var;
- *off = wide_int_to_tree (ssizetype, diff);
+ split_constant_offset (op0, var, off, nullptr, cache, limit);
+ *var = fold_convert (type, *var);
+ }
+ else if (POINTER_TYPE_P (itype))
+ {
+ split_constant_offset (op0, var, off, nullptr, cache, limit);
+ *var = fold_convert (sizetype, *var);
}
else
- split_constant_offset (op0, &var0, off, cache, limit);
- *var = fold_convert (type, var0);
+ {
+ split_constant_offset (op0, var, off, &op0_range,
+ cache, limit);
+ if (!nop_conversion_for_offset_p (type, itype, op0_range))
+ return false;
+ if (result_range)
+ {
+ *result_range = op0_range;
+ range_cast (*result_range, type);
+ }
+ }
return true;
}
return false;
}
}
-/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
- will be ssizetype. */
+/* If EXP has pointer type, try to express it as:
+
+ POINTER_PLUS <*VAR, (sizetype) *OFF>
+
+ where:
+
+ - *VAR has the same type as EXP
+ - *OFF is a constant of type ssizetype.
+
+ If EXP has an integral type, try to express (sizetype) EXP as:
+
+ *VAR + (sizetype) *OFF
+
+ where:
+
+ - *VAR has type sizetype
+ - *OFF is a constant of type ssizetype.
+
+ If EXP_RANGE is nonnull, set it to the range of EXP.
+
+ CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
+ visited. LIMIT counts down the number of SSA names that we are
+ allowed to process before giving up. */
static void
-split_constant_offset (tree exp, tree *var, tree *off,
+split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
hash_map<tree, std::pair<tree, tree> > &cache,
unsigned *limit)
{
- tree type = TREE_TYPE (exp), op0, op1, e, o;
+ tree type = TREE_TYPE (exp), op0, op1;
enum tree_code code;
- *var = exp;
- *off = ssize_int (0);
-
- if (tree_is_chrec (exp)
- || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
- return;
-
code = TREE_CODE (exp);
- extract_ops_from_tree (exp, &code, &op0, &op1);
- if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
+ if (exp_range)
{
- *var = e;
- *off = o;
+ *exp_range = type;
+ if (code == SSA_NAME)
+ {
+ value_range vr;
+ get_range_query (cfun)->range_of_expr (vr, exp);
+ if (vr.undefined_p ())
+ vr.set_varying (TREE_TYPE (exp));
+ wide_int var_min = wi::to_wide (vr.min ());
+ wide_int var_max = wi::to_wide (vr.max ());
+ value_range_kind vr_kind = vr.kind ();
+ wide_int var_nonzero = get_nonzero_bits (exp);
+ vr_kind = intersect_range_with_nonzero_bits (vr_kind,
+ &var_min, &var_max,
+ var_nonzero,
+ TYPE_SIGN (type));
+ /* This check for VR_VARYING is here because the old code
+ using get_range_info would return VR_RANGE for the entire
+ domain, instead of VR_VARYING. The new code normalizes
+ full-domain ranges to VR_VARYING. */
+ if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
+ *exp_range = value_range (type, var_min, var_max);
+ }
}
+
+ if (!tree_is_chrec (exp)
+ && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
+ {
+ extract_ops_from_tree (exp, &code, &op0, &op1);
+ if (split_constant_offset_1 (type, op0, code, op1, var, off,
+ exp_range, cache, limit))
+ return;
+ }
+
+ *var = exp;
+ if (INTEGRAL_TYPE_P (type))
+ *var = fold_convert (sizetype, *var);
+ *off = ssize_int (0);
+
+ value_range r;
+ if (exp_range && code != SSA_NAME
+ && get_range_query (cfun)->range_of_expr (r, exp)
+ && !r.undefined_p ())
+ *exp_range = r;
}
+/* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
+ type as EXP while OFF has type ssizetype. */
+
void
split_constant_offset (tree exp, tree *var, tree *off)
{
static hash_map<tree, std::pair<tree, tree> > *cache;
if (!cache)
cache = new hash_map<tree, std::pair<tree, tree> > (37);
- split_constant_offset (exp, var, off, *cache, &limit);
+ split_constant_offset (exp, var, off, nullptr, *cache, &limit);
+ *var = fold_convert (TREE_TYPE (exp), *var);
cache->empty ();
}
}
}
+/* Returns whether BASE can have a access_fn_component_p with BASE
+ as base. */
+
+static bool
+base_supports_access_fn_components_p (tree base)
+{
+ switch (TREE_CODE (TREE_TYPE (base)))
+ {
+ case COMPLEX_TYPE:
+ case ARRAY_TYPE:
+ case RECORD_TYPE:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* Determines the base object and the list of indices of memory reference
DR, analyzed in LOOP and instantiated before NEST. */
static void
-dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
+dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
{
- vec<tree> access_fns = vNULL;
- tree ref, op;
- tree base, off, access_fn;
-
/* If analyzing a basic-block there are no indices to analyze
and thus no access functions. */
if (!nest)
{
- DR_BASE_OBJECT (dr) = DR_REF (dr);
- DR_ACCESS_FNS (dr).create (0);
+ dri->base_object = ref;
+ dri->access_fns.create (0);
return;
}
- ref = DR_REF (dr);
+ vec<tree> access_fns = vNULL;
/* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
into a two element array with a constant index. The base is
{
if (TREE_CODE (ref) == ARRAY_REF)
{
- op = TREE_OPERAND (ref, 1);
- access_fn = analyze_scalar_evolution (loop, op);
+ tree op = TREE_OPERAND (ref, 1);
+ tree access_fn = analyze_scalar_evolution (loop, op);
access_fn = instantiate_scev (nest, loop, access_fn);
access_fns.safe_push (access_fn);
}
analyzed nest, add it as an additional independent access-function. */
if (TREE_CODE (ref) == MEM_REF)
{
- op = TREE_OPERAND (ref, 0);
- access_fn = analyze_scalar_evolution (loop, op);
+ tree op = TREE_OPERAND (ref, 0);
+ tree access_fn = analyze_scalar_evolution (loop, op);
access_fn = instantiate_scev (nest, loop, access_fn);
+ STRIP_NOPS (access_fn);
if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
{
- tree orig_type;
tree memoff = TREE_OPERAND (ref, 1);
- base = initial_condition (access_fn);
- orig_type = TREE_TYPE (base);
+ tree base = initial_condition (access_fn);
+ tree orig_type = TREE_TYPE (base);
STRIP_USELESS_TYPE_CONVERSION (base);
+ tree off;
split_constant_offset (base, &base, &off);
STRIP_USELESS_TYPE_CONVERSION (base);
/* Fold the MEM_REF offset into the evolutions initial
base, memoff);
MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
- DR_UNCONSTRAINED_BASE (dr) = true;
+ dri->unconstrained_base = true;
access_fns.safe_push (access_fn);
}
}
build_int_cst (reference_alias_ptr_type (ref), 0));
}
- DR_BASE_OBJECT (dr) = ref;
- DR_ACCESS_FNS (dr) = access_fns;
+ dri->base_object = ref;
+ dri->access_fns = access_fns;
}
/* Extracts the alias analysis information from the memory reference DR. */
free_data_ref (data_reference_p dr)
{
DR_ACCESS_FNS (dr).release ();
+ if (dr->alt_indices.base_object)
+ dr->alt_indices.access_fns.release ();
free (dr);
}
dr_analyze_innermost (&DR_INNERMOST (dr), memref,
nest != NULL ? loop : NULL, stmt);
- dr_analyze_indices (dr, nest, loop);
+ dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
dr_analyze_alias (dr);
if (dump_file && (dump_flags & TDF_DETAILS))
bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
- unsigned int i;
- for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
+ int found = -1;
+ for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
{
tree access1 = DR_ACCESS_FN (dr_a.dr, i);
tree access2 = DR_ACCESS_FN (dr_b.dr, i);
return false;
}
- /* The two indices must have the same step. */
- if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
+ if (found >= 0)
return false;
+ found = i;
+ }
- tree idx_step = CHREC_RIGHT (access1);
- /* Index must have const step, otherwise DR_STEP won't be constant. */
- gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
- /* Index must evaluate in the same direction as DR. */
- gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
+ /* Ought not to happen in practice, since if all accesses are equal then the
+ alias should be decidable at compile time. */
+ if (found < 0)
+ return false;
- tree min1 = CHREC_LEFT (access1);
- tree min2 = CHREC_LEFT (access2);
- if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
- return false;
+ /* The two indices must have the same step. */
+ tree access1 = DR_ACCESS_FN (dr_a.dr, found);
+ tree access2 = DR_ACCESS_FN (dr_b.dr, found);
+ if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
+ return false;
- /* Ideally, alias can be checked against loop's control IV, but we
- need to prove linear mapping between control IV and reference
- index. Although that should be true, we check against (array)
- index of data reference. Like segment length, index length is
- linear function of the number of iterations with index_step as
- the coefficient, i.e, niter_len * idx_step. */
- offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
- SIGNED);
- if (neg_step)
- abs_idx_step = -abs_idx_step;
- poly_offset_int idx_len1 = abs_idx_step * niter_len1;
- poly_offset_int idx_len2 = abs_idx_step * niter_len2;
- poly_offset_int idx_access1 = abs_idx_step * niter_access1;
- poly_offset_int idx_access2 = abs_idx_step * niter_access2;
+ tree idx_step = CHREC_RIGHT (access1);
+ /* Index must have const step, otherwise DR_STEP won't be constant. */
+ gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
+ /* Index must evaluate in the same direction as DR. */
+ gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
- gcc_assert (known_ge (idx_len1, 0)
- && known_ge (idx_len2, 0)
- && known_ge (idx_access1, 0)
- && known_ge (idx_access2, 0));
+ tree min1 = CHREC_LEFT (access1);
+ tree min2 = CHREC_LEFT (access2);
+ if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
+ return false;
- /* Each access has the following pattern, with lengths measured
- in units of INDEX:
+ /* Ideally, alias can be checked against loop's control IV, but we
+ need to prove linear mapping between control IV and reference
+ index. Although that should be true, we check against (array)
+ index of data reference. Like segment length, index length is
+ linear function of the number of iterations with index_step as
+ the coefficient, i.e, niter_len * idx_step. */
+ offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
+ SIGNED);
+ if (neg_step)
+ abs_idx_step = -abs_idx_step;
+ poly_offset_int idx_len1 = abs_idx_step * niter_len1;
+ poly_offset_int idx_len2 = abs_idx_step * niter_len2;
+ poly_offset_int idx_access1 = abs_idx_step * niter_access1;
+ poly_offset_int idx_access2 = abs_idx_step * niter_access2;
- <-- idx_len -->
- <--- A: -ve step --->
- +-----+-------+-----+-------+-----+
- | n-1 | ..... | 0 | ..... | n-1 |
- +-----+-------+-----+-------+-----+
- <--- B: +ve step --->
- <-- idx_len -->
- |
- min
+ gcc_assert (known_ge (idx_len1, 0)
+ && known_ge (idx_len2, 0)
+ && known_ge (idx_access1, 0)
+ && known_ge (idx_access2, 0));
- where "n" is the number of scalar iterations covered by the segment
- and where each access spans idx_access units.
+ /* Each access has the following pattern, with lengths measured
+ in units of INDEX:
- A is the range of bytes accessed when the step is negative,
- B is the range when the step is positive.
+ <-- idx_len -->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ..... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <-- idx_len -->
+ |
+ min
- When checking for general overlap, we need to test whether
- the range:
+ where "n" is the number of scalar iterations covered by the segment
+ and where each access spans idx_access units.
- [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
- overlaps:
+ When checking for general overlap, we need to test whether
+ the range:
- [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
+ [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
- where:
+ overlaps:
- low_offsetN = +ve step ? 0 : -idx_lenN;
- high_offsetN = +ve step ? idx_lenN : 0;
+ [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
- This is equivalent to testing whether:
+ where:
- min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
- && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
+ low_offsetN = +ve step ? 0 : -idx_lenN;
+ high_offsetN = +ve step ? idx_lenN : 0;
- Converting this into a single test, there is an overlap if:
+ This is equivalent to testing whether:
- 0 <= min2 - min1 + bias <= limit
+ min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
+ && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
+
+ Converting this into a single test, there is an overlap if:
- where bias = high_offset2 + idx_access2 - 1 - low_offset1
- limit = (high_offset1 - low_offset1 + idx_access1 - 1)
- + (high_offset2 - low_offset2 + idx_access2 - 1)
- i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
+ 0 <= min2 - min1 + bias <= limit
- Combining the tests requires limit to be computable in an unsigned
- form of the index type; if it isn't, we fall back to the usual
- pointer-based checks.
+ where bias = high_offset2 + idx_access2 - 1 - low_offset1
+ limit = (high_offset1 - low_offset1 + idx_access1 - 1)
+ + (high_offset2 - low_offset2 + idx_access2 - 1)
+ i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
- We can do better if DR_B is a write and if DR_A and DR_B are
- well-ordered in both the original and the new code (see the
- comment above the DR_ALIAS_* flags for details). In this case
- we know that for each i in [0, n-1], the write performed by
- access i of DR_B occurs after access numbers j<=i of DR_A in
- both the original and the new code. Any write or anti
- dependencies wrt those DR_A accesses are therefore maintained.
+ Combining the tests requires limit to be computable in an unsigned
+ form of the index type; if it isn't, we fall back to the usual
+ pointer-based checks.
- We just need to make sure that each individual write in DR_B does not
- overlap any higher-indexed access in DR_A; such DR_A accesses happen
- after the DR_B access in the original code but happen before it in
- the new code.
+ We can do better if DR_B is a write and if DR_A and DR_B are
+ well-ordered in both the original and the new code (see the
+ comment above the DR_ALIAS_* flags for details). In this case
+ we know that for each i in [0, n-1], the write performed by
+ access i of DR_B occurs after access numbers j<=i of DR_A in
+ both the original and the new code. Any write or anti
+ dependencies wrt those DR_A accesses are therefore maintained.
+
+ We just need to make sure that each individual write in DR_B does not
+ overlap any higher-indexed access in DR_A; such DR_A accesses happen
+ after the DR_B access in the original code but happen before it in
+ the new code.
- We know the steps for both accesses are equal, so by induction, we
- just need to test whether the first write of DR_B overlaps a later
- access of DR_A. In other words, we need to move min1 along by
- one iteration:
+ We know the steps for both accesses are equal, so by induction, we
+ just need to test whether the first write of DR_B overlaps a later
+ access of DR_A. In other words, we need to move min1 along by
+ one iteration:
- min1' = min1 + idx_step
+ min1' = min1 + idx_step
- and use the ranges:
+ and use the ranges:
- [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
+ [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
- and:
+ and:
- [min2, min2 + idx_access2 - 1]
+ [min2, min2 + idx_access2 - 1]
- where:
+ where:
- low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
- high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
- if (waw_or_war_p)
- idx_len1 -= abs_idx_step;
+ low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
+ high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
+ if (waw_or_war_p)
+ idx_len1 -= abs_idx_step;
- poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
- if (!waw_or_war_p)
- limit += idx_len2;
+ poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
+ if (!waw_or_war_p)
+ limit += idx_len2;
- tree utype = unsigned_type_for (TREE_TYPE (min1));
- if (!wi::fits_to_tree_p (limit, utype))
- return false;
+ tree utype = unsigned_type_for (TREE_TYPE (min1));
+ if (!wi::fits_to_tree_p (limit, utype))
+ return false;
- poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
- poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
- poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
- /* Equivalent to adding IDX_STEP to MIN1. */
- if (waw_or_war_p)
- bias -= wi::to_offset (idx_step);
-
- tree subject = fold_build2 (MINUS_EXPR, utype,
- fold_convert (utype, min2),
- fold_convert (utype, min1));
- subject = fold_build2 (PLUS_EXPR, utype, subject,
- wide_int_to_tree (utype, bias));
- tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
- wide_int_to_tree (utype, limit));
- if (*cond_expr)
- *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
- *cond_expr, part_cond_expr);
- else
- *cond_expr = part_cond_expr;
- }
+ poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
+ poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
+ poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
+ /* Equivalent to adding IDX_STEP to MIN1. */
+ if (waw_or_war_p)
+ bias -= wi::to_offset (idx_step);
+
+ tree subject = fold_build2 (MINUS_EXPR, utype,
+ fold_convert (utype, min2),
+ fold_convert (utype, min1));
+ subject = fold_build2 (PLUS_EXPR, utype, subject,
+ wide_int_to_tree (utype, bias));
+ tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
+ wide_int_to_tree (utype, limit));
+ if (*cond_expr)
+ *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+ *cond_expr, part_cond_expr);
+ else
+ *cond_expr = part_cond_expr;
if (dump_enabled_p ())
{
if (waw_or_war_p)
void
create_runtime_alias_checks (class loop *loop,
- vec<dr_with_seg_len_pair_t> *alias_pairs,
+ const vec<dr_with_seg_len_pair_t> *alias_pairs,
tree * cond_expr)
{
tree part_cond_expr;
fold_defer_overflow_warnings ();
- dr_with_seg_len_pair_t *alias_pair;
- unsigned int i;
- FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
+ for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
{
- gcc_assert (alias_pair->flags);
+ gcc_assert (alias_pair.flags);
if (dump_enabled_p ())
dump_printf (MSG_NOTE,
"create runtime check for data references %T and %T\n",
- DR_REF (alias_pair->first.dr),
- DR_REF (alias_pair->second.dr));
+ DR_REF (alias_pair.first.dr),
+ DR_REF (alias_pair.second.dr));
/* Create condition expression for each pair data references. */
- create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
+ create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
if (*cond_expr)
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
*cond_expr, part_cond_expr);
TREE_TYPE (TREE_OPERAND (ref_b, 0)));
}
-/* Initialize a data dependence relation between data accesses A and
- B. NB_LOOPS is the number of loops surrounding the references: the
- size of the classic distance/direction vectors. */
+/* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
+ is true when the main indices of A and B were not comparable so we try again
+ with alternate indices computed on an indirect reference. */
struct data_dependence_relation *
-initialize_data_dependence_relation (struct data_reference *a,
- struct data_reference *b,
- vec<loop_p> loop_nest)
+initialize_data_dependence_relation (struct data_dependence_relation *res,
+ vec<loop_p> loop_nest,
+ bool use_alt_indices)
{
- struct data_dependence_relation *res;
+ struct data_reference *a = DDR_A (res);
+ struct data_reference *b = DDR_B (res);
unsigned int i;
- res = XCNEW (struct data_dependence_relation);
- DDR_A (res) = a;
- DDR_B (res) = b;
- DDR_LOOP_NEST (res).create (0);
- DDR_SUBSCRIPTS (res).create (0);
- DDR_DIR_VECTS (res).create (0);
- DDR_DIST_VECTS (res).create (0);
-
- if (a == NULL || b == NULL)
- {
- DDR_ARE_DEPENDENT (res) = chrec_dont_know;
- return res;
- }
-
- /* If the data references do not alias, then they are independent. */
- if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
+ struct indices *indices_a = &a->indices;
+ struct indices *indices_b = &b->indices;
+ if (use_alt_indices)
{
- DDR_ARE_DEPENDENT (res) = chrec_known;
- return res;
+ if (TREE_CODE (DR_REF (a)) != MEM_REF)
+ indices_a = &a->alt_indices;
+ if (TREE_CODE (DR_REF (b)) != MEM_REF)
+ indices_b = &b->alt_indices;
}
-
- unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
- unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
+ unsigned int num_dimensions_a = indices_a->access_fns.length ();
+ unsigned int num_dimensions_b = indices_b->access_fns.length ();
if (num_dimensions_a == 0 || num_dimensions_b == 0)
{
DDR_ARE_DEPENDENT (res) = chrec_dont_know;
the a and b accesses have a single ARRAY_REF component reference [0]
but have two subscripts. */
- if (DR_UNCONSTRAINED_BASE (a))
+ if (indices_a->unconstrained_base)
num_dimensions_a -= 1;
- if (DR_UNCONSTRAINED_BASE (b))
+ if (indices_b->unconstrained_base)
num_dimensions_b -= 1;
/* These structures describe sequences of component references in
B: [3, 4] (i.e. s.e) */
while (index_a < num_dimensions_a && index_b < num_dimensions_b)
{
+ /* The alternate indices form always has a single dimension
+ with unconstrained base. */
+ gcc_assert (!use_alt_indices);
+
/* REF_A and REF_B must be one of the component access types
allowed by dr_analyze_indices. */
gcc_checking_assert (access_fn_component_p (ref_a));
/* See whether FULL_SEQ ends at the base and whether the two bases
are equal. We do not care about TBAA or alignment info so we can
use OEP_ADDRESS_OF to avoid false negatives. */
- tree base_a = DR_BASE_OBJECT (a);
- tree base_b = DR_BASE_OBJECT (b);
+ tree base_a = indices_a->base_object;
+ tree base_b = indices_b->base_object;
bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
&& full_seq.start_b + full_seq.length == num_dimensions_b
- && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
+ && (indices_a->unconstrained_base
+ == indices_b->unconstrained_base)
&& operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
- && types_compatible_p (TREE_TYPE (base_a),
- TREE_TYPE (base_b))
+ && (types_compatible_p (TREE_TYPE (base_a),
+ TREE_TYPE (base_b))
+ || (!base_supports_access_fn_components_p (base_a)
+ && !base_supports_access_fn_components_p (base_b)
+ && operand_equal_p
+ (TYPE_SIZE (TREE_TYPE (base_a)),
+ TYPE_SIZE (TREE_TYPE (base_b)), 0)))
&& (!loop_nest.exists ()
|| (object_address_invariant_in_loop_p
(loop_nest[0], base_a))));
both lvalues are distinct from the object's declared type. */
if (same_base_p)
{
- if (DR_UNCONSTRAINED_BASE (a))
+ if (indices_a->unconstrained_base)
full_seq.length += 1;
}
else
/* Punt if we didn't find a suitable sequence. */
if (full_seq.length == 0)
{
- DDR_ARE_DEPENDENT (res) = chrec_dont_know;
- return res;
+ if (use_alt_indices
+ || (TREE_CODE (DR_REF (a)) == MEM_REF
+ && TREE_CODE (DR_REF (b)) == MEM_REF)
+ || may_be_nonaddressable_p (DR_REF (a))
+ || may_be_nonaddressable_p (DR_REF (b)))
+ {
+ /* Fully exhausted possibilities. */
+ DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+ return res;
+ }
+
+ /* Try evaluating both DRs as dereferences of pointers. */
+ if (!a->alt_indices.base_object
+ && TREE_CODE (DR_REF (a)) != MEM_REF)
+ {
+ tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
+ build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
+ build_int_cst
+ (reference_alias_ptr_type (DR_REF (a)), 0));
+ dr_analyze_indices (&a->alt_indices, alt_ref,
+ loop_preheader_edge (loop_nest[0]),
+ loop_containing_stmt (DR_STMT (a)));
+ }
+ if (!b->alt_indices.base_object
+ && TREE_CODE (DR_REF (b)) != MEM_REF)
+ {
+ tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
+ build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
+ build_int_cst
+ (reference_alias_ptr_type (DR_REF (b)), 0));
+ dr_analyze_indices (&b->alt_indices, alt_ref,
+ loop_preheader_edge (loop_nest[0]),
+ loop_containing_stmt (DR_STMT (b)));
+ }
+ return initialize_data_dependence_relation (res, loop_nest, true);
}
if (!same_base_p)
struct subscript *subscript;
subscript = XNEW (struct subscript);
- SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
- SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
+ SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
+ SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
return res;
}
+/* Initialize a data dependence relation between data accesses A and
+ B. NB_LOOPS is the number of loops surrounding the references: the
+ size of the classic distance/direction vectors. */
+
+struct data_dependence_relation *
+initialize_data_dependence_relation (struct data_reference *a,
+ struct data_reference *b,
+ vec<loop_p> loop_nest)
+{
+ data_dependence_relation *res = XCNEW (struct data_dependence_relation);
+ DDR_A (res) = a;
+ DDR_B (res) = b;
+ DDR_LOOP_NEST (res).create (0);
+ DDR_SUBSCRIPTS (res).create (0);
+ DDR_DIR_VECTS (res).create (0);
+ DDR_DIST_VECTS (res).create (0);
+
+ if (a == NULL || b == NULL)
+ {
+ DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+ return res;
+ }
+
+ /* If the data references do not alias, then they are independent. */
+ if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
+ {
+ DDR_ARE_DEPENDENT (res) = chrec_known;
+ return res;
+ }
+
+ return initialize_data_dependence_relation (res, loop_nest, false);
+}
+
+
/* Frees memory used by the conflict function F. */
static void
static void
free_subscripts (vec<subscript_p> subscripts)
{
- unsigned i;
- subscript_p s;
-
- FOR_EACH_VEC_ELT (subscripts, i, s)
+ for (subscript_p s : subscripts)
{
free_conflict_function (s->conflicting_iterations_in_a);
free_conflict_function (s->conflicting_iterations_in_b);
switch (TREE_CODE (chrec))
{
case POLYNOMIAL_CHREC:
+ HOST_WIDE_INT chrec_right;
if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
return chrec_dont_know;
- A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
+ chrec_right = int_cst_value (CHREC_RIGHT (chrec));
+ /* We want to be able to negate without overflow. */
+ if (chrec_right == HOST_WIDE_INT_MIN)
+ return chrec_dont_know;
+ A[index][0] = mult * chrec_right;
return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
case PLUS_EXPR:
/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
R2 = R2 + CONST1 * R1. */
-static void
+static bool
lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
lambda_int const1)
{
int i;
if (const1 == 0)
- return;
+ return true;
for (i = 0; i < n; i++)
- mat[r2][i] += const1 * mat[r1][i];
+ {
+ bool ovf;
+ lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
+ if (ovf)
+ return false;
+ lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
+ if (ovf || tem2 == HOST_WIDE_INT_MIN)
+ return false;
+ mat[r2][i] = tem2;
+ }
+
+ return true;
}
/* Multiply vector VEC1 of length SIZE by a constant CONST1,
Ref: Algorithm 2.1 page 33 in "Loop Transformations for
Restructuring Compilers" Utpal Banerjee. */
-static void
+static bool
lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
lambda_matrix S, lambda_matrix U)
{
{
while (S[i][j] != 0)
{
- lambda_int sigma, factor, a, b;
+ lambda_int factor, a, b;
a = S[i-1][j];
b = S[i][j];
- sigma = (a * b < 0) ? -1: 1;
- a = abs_hwi (a);
- b = abs_hwi (b);
- factor = sigma * (a / b);
+ gcc_assert (a != HOST_WIDE_INT_MIN);
+ factor = a / b;
- lambda_matrix_row_add (S, n, i, i-1, -factor);
+ if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
+ return false;
std::swap (S[i], S[i-1]);
- lambda_matrix_row_add (U, m, i, i-1, -factor);
+ if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
+ return false;
std::swap (U[i], U[i-1]);
}
}
}
}
+
+ return true;
}
/* Determines the overlapping elements due to accesses CHREC_A and
tree *last_conflicts)
{
unsigned nb_vars_a, nb_vars_b, dim;
- HOST_WIDE_INT gamma, gcd_alpha_beta;
+ lambda_int gamma, gcd_alpha_beta;
lambda_matrix A, U, S;
struct obstack scratch_obstack;
}
/* U.A = S */
- lambda_matrix_right_hermite (A, dim, 1, S, U);
+ if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
+ {
+ *overlaps_a = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
+ *last_conflicts = chrec_dont_know;
+ goto end_analyze_subs_aa;
+ }
if (S[0][0] < 0)
{
static void
save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
{
- unsigned i;
- lambda_vector v;
-
- FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
+ for (lambda_vector v : DDR_DIST_VECTS (ddr))
if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
return;
static void
save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
{
- unsigned i;
- lambda_vector v;
-
- FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
+ for (lambda_vector v : DDR_DIR_VECTS (ddr))
if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
return;
non_affine_dependence_relation (ddr);
return false;
}
+ else
+ *init_b = true;
}
return true;
}
-/* Return true when the DDR contains only constant access functions. */
+/* Return true when the DDR contains only invariant access functions wrto. loop
+ number LNUM. */
static bool
-constant_access_functions (const struct data_dependence_relation *ddr)
+invariant_access_functions (const struct data_dependence_relation *ddr,
+ int lnum)
{
- unsigned i;
- subscript *sub;
-
- FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
- if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
- || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
+ for (subscript *sub : DDR_SUBSCRIPTS (ddr))
+ if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
+ || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
return false;
return true;
static inline bool
same_access_functions (const struct data_dependence_relation *ddr)
{
- unsigned i;
- subscript *sub;
-
- FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
+ for (subscript *sub : DDR_SUBSCRIPTS (ddr))
if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
SUB_ACCESS_FN (sub, 1)))
return false;
dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
save_dist_v (ddr, dist_v);
- if (constant_access_functions (ddr))
+ if (invariant_access_functions (ddr, loop_nest->num))
add_distance_for_zero_overlaps (ddr);
if (DDR_NB_LOOPS (ddr) > 1)
access_functions_are_affine_or_constant_p (const struct data_reference *a,
const class loop *loop_nest)
{
- unsigned int i;
vec<tree> fns = DR_ACCESS_FNS (a);
- tree t;
-
- FOR_EACH_VEC_ELT (fns, i, t)
+ for (tree t : fns)
if (!evolution_function_is_invariant_p (t, loop_nest->num)
&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
return false;
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "(compute_affine_dependence\n");
- fprintf (dump_file, " stmt_a: ");
+ fprintf (dump_file, " ref_a: ");
+ print_generic_expr (dump_file, DR_REF (dra));
+ fprintf (dump_file, ", stmt_a: ");
print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
- fprintf (dump_file, " stmt_b: ");
+ fprintf (dump_file, " ref_b: ");
+ print_generic_expr (dump_file, DR_REF (drb));
+ fprintf (dump_file, ", stmt_b: ");
print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
}
is small enough to be handled. */
bool
-compute_all_dependences (vec<data_reference_p> datarefs,
+compute_all_dependences (const vec<data_reference_p> &datarefs,
vec<ddr_p> *dependence_relations,
- vec<loop_p> loop_nest,
+ const vec<loop_p> &loop_nest,
bool compute_self_and_rr)
{
struct data_dependence_relation *ddr;
find_data_references_in_stmt (class loop *nest, gimple *stmt,
vec<data_reference_p> *datarefs)
{
- unsigned i;
auto_vec<data_ref_loc, 2> references;
- data_ref_loc *ref;
data_reference_p dr;
if (get_references_in_stmt (stmt, &references))
return opt_result::failure_at (stmt, "statement clobbers memory: %G",
stmt);
- FOR_EACH_VEC_ELT (references, i, ref)
+ for (const data_ref_loc &ref : references)
{
dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
- loop_containing_stmt (stmt), ref->ref,
- stmt, ref->is_read, ref->is_conditional_in_stmt);
+ loop_containing_stmt (stmt), ref.ref,
+ stmt, ref.is_read, ref.is_conditional_in_stmt);
gcc_assert (dr != NULL);
datarefs->safe_push (dr);
}
graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
vec<data_reference_p> *datarefs)
{
- unsigned i;
auto_vec<data_ref_loc, 2> references;
- data_ref_loc *ref;
bool ret = true;
data_reference_p dr;
if (get_references_in_stmt (stmt, &references))
return false;
- FOR_EACH_VEC_ELT (references, i, ref)
+ for (const data_ref_loc &ref : references)
{
- dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
- ref->is_conditional_in_stmt);
+ dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
+ ref.is_conditional_in_stmt);
gcc_assert (dr != NULL);
datarefs->safe_push (dr);
}
DEPENDENCE_RELATIONS. */
void
-free_dependence_relations (vec<ddr_p> dependence_relations)
+free_dependence_relations (vec<ddr_p>& dependence_relations)
{
- unsigned int i;
- struct data_dependence_relation *ddr;
-
- FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
+ for (data_dependence_relation *ddr : dependence_relations)
if (ddr)
free_dependence_relation (ddr);
/* Free the memory used by the data references from DATAREFS. */
void
-free_data_refs (vec<data_reference_p> datarefs)
+free_data_refs (vec<data_reference_p>& datarefs)
{
- unsigned int i;
- struct data_reference *dr;
-
- FOR_EACH_VEC_ELT (datarefs, i, dr)
+ for (data_reference *dr : datarefs)
free_data_ref (dr);
datarefs.release ();
}
/* Get the range of values that the unconverted step actually has. */
wide_int step_min, step_max;
+ value_range vr;
if (TREE_CODE (step) != SSA_NAME
- || get_range_info (step, &step_min, &step_max) != VR_RANGE)
+ || !get_range_query (cfun)->range_of_expr (vr, step)
+ || vr.kind () != VR_RANGE)
{
step_min = wi::to_wide (TYPE_MIN_VALUE (type));
step_max = wi::to_wide (TYPE_MAX_VALUE (type));
}
+ else
+ {
+ step_min = vr.lower_bound ();
+ step_max = vr.upper_bound ();
+ }
/* Check whether the unconverted step has an acceptable range. */
signop sgn = TYPE_SIGN (type);