/* Data references and dependences detectors.
- Copyright (C) 2003-2018 Free Software Foundation, Inc.
+ Copyright (C) 2003-2019 Free Software Foundation, Inc.
Contributed by Sebastian Pop <pop@cri.ensmp.fr>
This file is part of GCC.
#include "tree-affine.h"
#include "params.h"
#include "builtins.h"
-#include "stringpool.h"
-#include "tree-vrp.h"
-#include "tree-ssanames.h"
+#include "tree-eh.h"
+#include "ssa.h"
static struct datadep_stats
{
int i;
for (i = 0; i < n; i++)
- fprintf (outfile, "%3d ", vector[i]);
+ fprintf (outfile, "%3d ", (int)vector[i]);
fprintf (outfile, "\n");
}
dump_subscript (outf, sub);
}
- fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
fprintf (outf, " loop nest: (");
FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
fprintf (outf, "%d ", loopi->num);
dump_ddrs (stderr, ddrs);
}
+static void
+split_constant_offset (tree exp, tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache);
+
/* Helper function for split_constant_offset. Expresses OP0 CODE OP1
(the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
constant of type ssizetype, and returns true. If we cannot do this
static bool
split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
- tree *var, tree *off)
+ tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache)
{
tree var0, var1;
tree off0, off1;
/* FALLTHROUGH */
case PLUS_EXPR:
case MINUS_EXPR:
- split_constant_offset (op0, &var0, &off0);
- split_constant_offset (op1, &var1, &off1);
+ split_constant_offset (op0, &var0, &off0, cache);
+ split_constant_offset (op1, &var1, &off1, cache);
*var = fold_build2 (code, type, var0, var1);
*off = size_binop (ocode, off0, off1);
return true;
if (TREE_CODE (op1) != INTEGER_CST)
return false;
- split_constant_offset (op0, &var0, &off0);
+ split_constant_offset (op0, &var0, &off0, cache);
*var = fold_build2 (MULT_EXPR, type, var0, op1);
*off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
return true;
if (poffset)
{
- split_constant_offset (poffset, &poffset, &off1);
+ split_constant_offset (poffset, &poffset, &off1, cache);
off0 = size_binop (PLUS_EXPR, off0, off1);
if (POINTER_TYPE_P (TREE_TYPE (base)))
base = fold_build_pointer_plus (base, poffset);
if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
return false;
- var0 = gimple_assign_rhs1 (def_stmt);
subcode = gimple_assign_rhs_code (def_stmt);
+
+ /* We are using a cache to avoid un-CSEing large amounts of code. */
+ bool use_cache = false;
+ if (!has_single_use (op0)
+ && (subcode == POINTER_PLUS_EXPR
+ || subcode == PLUS_EXPR
+ || subcode == MINUS_EXPR
+ || subcode == MULT_EXPR
+ || subcode == ADDR_EXPR
+ || CONVERT_EXPR_CODE_P (subcode)))
+ {
+ use_cache = true;
+ bool existed;
+ std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
+ if (existed)
+ {
+ if (integer_zerop (e.second))
+ return false;
+ *var = e.first;
+ *off = e.second;
+ return true;
+ }
+ e = std::make_pair (op0, ssize_int (0));
+ }
+
+ var0 = gimple_assign_rhs1 (def_stmt);
var1 = gimple_assign_rhs2 (def_stmt);
- return split_constant_offset_1 (type, var0, subcode, var1, var, off);
+ bool res = split_constant_offset_1 (type, var0, subcode, var1,
+ var, off, cache);
+ if (res && use_cache)
+ *cache.get (op0) = std::make_pair (*var, *off);
+ return res;
}
CASE_CONVERT:
{
- /* We must not introduce undefined overflow, and we must not change the value.
- Hence we're okay if the inner type doesn't overflow to start with
- (pointer or signed), the outer type also is an integer or pointer
- and the outer precision is at least as large as the inner. */
+ /* We must not introduce undefined overflow, and we must not change
+ the value. Hence we're okay if the inner type doesn't overflow
+ to start with (pointer or signed), the outer type also is an
+ integer or pointer and the outer precision is at least as large
+ as the inner. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
- || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+ || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
{
- split_constant_offset (op0, &var0, off);
+ if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
+ {
+ /* Split the unconverted operand and try to prove that
+ wrapping isn't a problem. */
+ tree tmp_var, tmp_off;
+ split_constant_offset (op0, &tmp_var, &tmp_off, cache);
+
+ /* See whether we have an SSA_NAME whose range is known
+ to be [A, B]. */
+ if (TREE_CODE (tmp_var) != SSA_NAME)
+ return false;
+ wide_int var_min, var_max;
+ value_range_kind vr_type = get_range_info (tmp_var, &var_min,
+ &var_max);
+ wide_int var_nonzero = get_nonzero_bits (tmp_var);
+ signop sgn = TYPE_SIGN (itype);
+ if (intersect_range_with_nonzero_bits (vr_type, &var_min,
+ &var_max, var_nonzero,
+ sgn) != VR_RANGE)
+ return false;
+
+ /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
+ is known to be [A + TMP_OFF, B + TMP_OFF], with all
+ operations done in ITYPE. The addition must overflow
+ at both ends of the range or at neither. */
+ wi::overflow_type overflow[2];
+ unsigned int prec = TYPE_PRECISION (itype);
+ wide_int woff = wi::to_wide (tmp_off, prec);
+ wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
+ wi::add (var_max, woff, sgn, &overflow[1]);
+ if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
+ return false;
+
+ /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
+ widest_int diff = (widest_int::from (op0_min, sgn)
+ - widest_int::from (var_min, sgn));
+ var0 = tmp_var;
+ *off = wide_int_to_tree (ssizetype, diff);
+ }
+ else
+ split_constant_offset (op0, &var0, off, cache);
*var = fold_convert (type, var0);
return true;
}
/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
will be ssizetype. */
-void
-split_constant_offset (tree exp, tree *var, tree *off)
+static void
+split_constant_offset (tree exp, tree *var, tree *off,
+ hash_map<tree, std::pair<tree, tree> > &cache)
{
tree type = TREE_TYPE (exp), op0, op1, e, o;
enum tree_code code;
code = TREE_CODE (exp);
extract_ops_from_tree (exp, &code, &op0, &op1);
- if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
+ if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache))
{
*var = e;
*off = o;
}
}
+void
+split_constant_offset (tree exp, tree *var, tree *off)
+{
+ static hash_map<tree, std::pair<tree, tree> > *cache;
+ if (!cache)
+ cache = new hash_map<tree, std::pair<tree, tree> > (37);
+ split_constant_offset (exp, var, off, *cache);
+ cache->empty ();
+}
+
/* Returns the address ADDR of an object in a canonical shape (without nop
casts, and with type of pointer to the object). */
return build_fold_addr_expr (TREE_OPERAND (addr, 0));
}
-/* Analyze the behavior of memory reference REF. There are two modes:
+/* Analyze the behavior of memory reference REF within STMT.
+ There are two modes:
- BB analysis. In this case we simply split the address into base,
init and offset components, without reference to any containing loop.
Return true if the analysis succeeded and store the results in DRB if so.
BB analysis can only fail for bitfield or reversed-storage accesses. */
-bool
+opt_result
dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
- struct loop *loop)
+ struct loop *loop, const gimple *stmt)
{
poly_int64 pbitsize, pbitpos;
tree base, poffset;
poly_int64 pbytepos;
if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: bit offset alignment.\n");
- return false;
- }
+ return opt_result::failure_at (stmt,
+ "failed: bit offset alignment.\n");
if (preversep)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: reverse storage order.\n");
- return false;
- }
+ return opt_result::failure_at (stmt,
+ "failed: reverse storage order.\n");
/* Calculate the alignment and misalignment for the inner reference. */
unsigned int HOST_WIDE_INT bit_base_misalignment;
if (in_loop)
{
if (!simple_iv (loop, loop, base, &base_iv, true))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of base is not affine.\n");
- return false;
- }
+ return opt_result::failure_at
+ (stmt, "failed: evolution of base is not affine.\n");
}
else
{
offset_iv.step = ssize_int (0);
}
else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of offset is not affine.\n");
- return false;
- }
+ return opt_result::failure_at
+ (stmt, "failed: evolution of offset is not affine.\n");
}
init = ssize_int (pbytepos);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "success.\n");
- return true;
+ return opt_result::success ();
}
/* Return true if OP is a valid component reference for a DR access
DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
dr_analyze_innermost (&DR_INNERMOST (dr), memref,
- nest != NULL ? loop : NULL);
+ nest != NULL ? loop : NULL, stmt);
dr_analyze_indices (dr, nest, loop);
dr_analyze_alias (dr);
return dr;
}
-/* A helper function computes order between two tree epxressions T1 and T2.
+/* A helper function computes order between two tree expressions T1 and T2.
This is used in comparator functions sorting objects based on the order
of tree expressions. The function returns -1, 0, or 1. */
/* Return TRUE it's possible to resolve data dependence DDR by runtime alias
check. */
-bool
+opt_result
runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
{
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE,
+ "consider run-time aliasing test between %T and %T\n",
+ DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
if (!speed_p)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported when optimizing "
- "for size.\n");
- return false;
- }
+ return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
+ "runtime alias check not supported when"
+ " optimizing for size.\n");
/* FORNOW: We don't support versioning with outer-loop in either
vectorization or loop distribution. */
if (loop != NULL && loop->inner != NULL)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported for outer loop.\n");
- return false;
- }
+ return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
+ "runtime alias check not supported for"
+ " outer loop.\n");
- return true;
+ return opt_result::success ();
}
/* Operator == between two dr_with_seg_len objects.
if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
{
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "found equal ranges ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
+ DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
alias_pairs->ordered_remove (i--);
continue;
}
dr_a1->align = MIN (dr_a1->align, new_align);
}
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "merging ranges for ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
+ DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
alias_pairs->ordered_remove (i);
i--;
}
tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
DR_OFFSET (d.dr));
addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
- tree seg_len = fold_convert (sizetype, d.seg_len);
+ tree seg_len
+ = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
seg_len, size_zero_node);
{
tree part_cond_expr;
+ fold_defer_overflow_warnings ();
for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
{
const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "create runtime check for data references ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
- dump_printf (MSG_NOTE, "\n");
- }
+ dump_printf (MSG_NOTE,
+ "create runtime check for data references %T and %T\n",
+ DR_REF (dr_a.dr), DR_REF (dr_b.dr));
/* Create condition expression for each pair data references. */
create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
else
*cond_expr = part_cond_expr;
}
+ fold_undefer_and_ignore_overflow_warnings ();
}
/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
{
if (TREE_CODE (obj) == ARRAY_REF)
{
- /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
- need to check the stride and the lower bound of the reference. */
- if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
- loop->num)
- || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
- loop->num))
- return false;
+ for (int i = 1; i < 4; ++i)
+ if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
+ loop->num))
+ return false;
}
else if (TREE_CODE (obj) == COMPONENT_REF)
{
bool
dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
- bool loop_nest)
+ struct loop *loop_nest)
{
tree addr_a = DR_BASE_OBJECT (a);
tree addr_b = DR_BASE_OBJECT (b);
if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
&& (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
+ /* For cross-iteration dependences the cliques must be valid for the
+ whole loop, not just individual iterations. */
+ && (!loop_nest
+ || MR_DEPENDENCE_CLIQUE (addr_a) == 1
+ || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
&& MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
&& MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
return false;
}
/* If the data references do not alias, then they are independent. */
- if (!dr_may_alias_p (a, b, loop_nest.exists ()))
+ if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
{
DDR_ARE_DEPENDENT (res) = chrec_known;
return res;
DDR_ARE_DEPENDENT (res) = NULL_TREE;
DDR_SUBSCRIPTS (res).create (full_seq.length);
DDR_LOOP_NEST (res) = loop_nest;
- DDR_INNER_LOOP (res) = 0;
DDR_SELF_REFERENCE (res) = false;
for (i = 0; i < full_seq.length; ++i)
{
if (value0 == false)
{
- if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
+ if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
+ || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "siv test failed: chrec not positive.\n");
}
else
{
- if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
+ if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
+ || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "siv test failed: chrec not positive.\n");
switch (TREE_CODE (chrec))
{
case POLYNOMIAL_CHREC:
+ if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
+ return chrec_dont_know;
A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
mat[i][j] = (i == j) ? 1 : 0;
}
-/* Return the first nonzero element of vector VEC1 between START and N.
- We must have START <= N. Returns N if VEC1 is the zero vector. */
+/* Return the index of the first nonzero element of vector VEC1 between
+ START and N. We must have START <= N.
+ Returns N if VEC1 is the zero vector. */
static int
lambda_vector_first_nz (lambda_vector vec1, int n, int start)
R2 = R2 + CONST1 * R1. */
static void
-lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
+lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
+ lambda_int const1)
{
int i;
static void
lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
- int size, int const1)
+ int size, lambda_int const1)
{
int i;
{
while (S[i][j] != 0)
{
- int sigma, factor, a, b;
+ lambda_int sigma, factor, a, b;
a = S[i-1][j];
b = S[i][j];
sigma = (a * b < 0) ? -1: 1;
- a = abs (a);
- b = abs (b);
+ a = abs_hwi (a);
+ b = abs_hwi (b);
factor = sigma * (a / b);
lambda_matrix_row_add (S, n, i, i-1, -factor);
tree *last_conflicts)
{
unsigned nb_vars_a, nb_vars_b, dim;
- HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
+ HOST_WIDE_INT gamma, gcd_alpha_beta;
lambda_matrix A, U, S;
struct obstack scratch_obstack;
A = lambda_matrix_new (dim, 1, &scratch_obstack);
S = lambda_matrix_new (dim, 1, &scratch_obstack);
- init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
- init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
- gamma = init_b - init_a;
+ tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
+ tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
+ if (init_a == chrec_dont_know
+ || init_b == chrec_dont_know)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "affine-affine test failed: "
+ "representation issue.\n");
+ *overlaps_a = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
+ *last_conflicts = chrec_dont_know;
+ goto end_analyze_subs_aa;
+ }
+ gamma = int_cst_value (init_b) - int_cst_value (init_a);
/* Don't do all the hard work of solving the Diophantine equation
when we already know the solution: for example,
if (niter > 0)
{
- HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
- FLOOR_DIV (niter_b - j0, j1));
- HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
-
/* If the overlap occurs outside of the bounds of the
loop, there is no dependence. */
if (x1 >= niter_a || y1 >= niter_b)
*last_conflicts = integer_zero_node;
goto end_analyze_subs_aa;
}
+
+ /* max stmt executions can get quite large, avoid
+ overflows by using wide ints here. */
+ widest_int tau2
+ = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
+ wi::sdiv_floor (wi::sub (niter_b, j0), j1));
+ widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
+ if (wi::min_precision (last_conflict, SIGNED)
+ <= TYPE_PRECISION (integer_type_node))
+ *last_conflicts
+ = build_int_cst (integer_type_node,
+ last_conflict.to_shwi ());
else
- *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
+ *last_conflicts = chrec_dont_know;
}
else
*last_conflicts = chrec_dont_know;
}
else if (evolution_function_is_constant_p (difference)
- /* For the moment, the following is verified:
- evolution_function_is_affine_multivariate_p (chrec_a,
- loop_nest->num) */
+ && evolution_function_is_affine_multivariate_p (chrec_a,
+ loop_nest->num)
&& !gcd_of_steps_may_divide_p (chrec_a, difference))
{
/* testsuite/.../ssa-chrec-33.c
dependence_stats.num_miv_independent++;
}
- else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
- && !chrec_contains_symbols (chrec_a)
- && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
- && !chrec_contains_symbols (chrec_b))
+ else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
+ && !chrec_contains_symbols (chrec_a, loop_nest)
+ && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
+ && !chrec_contains_symbols (chrec_b, loop_nest))
{
/* testsuite/.../ssa-chrec-35.c
{0, +, 1}_2 vs. {0, +, 1}_3
{
unsigned i;
lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
+ struct loop *loop = DDR_LOOP_NEST (ddr)[0];
for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
{
return false;
}
+ /* When data references are collected in a loop while data
+ dependences are analyzed in loop nest nested in the loop, we
+ would have more number of access functions than number of
+ loops. Skip access functions of loops not in the loop nest.
+
+ See PR89725 for more information. */
+ if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
+ continue;
+
dist = int_cst_value (SUB_DISTANCE (subscript));
index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
*index_carry = MIN (index, *index_carry);
unsigned i;
int index_carry = DDR_NB_LOOPS (ddr);
subscript *sub;
+ struct loop *loop = DDR_LOOP_NEST (ddr)[0];
FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
{
if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
{
- if (!evolution_function_is_univariate_p (access_fun))
+ if (!evolution_function_is_univariate_p (access_fun, loop->num))
{
if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
{
return;
}
+ /* When data references are collected in a loop while data
+ dependences are analyzed in loop nest nested in the loop, we
+ would have more number of access functions than number of
+ loops. Skip access functions of loops not in the loop nest.
+
+ See PR89725 for more information. */
+ if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
+ loop))
+ continue;
+
index_carry = MIN (index_carry,
index_in_loop_nest (CHREC_VARIABLE (access_fun),
DDR_LOOP_NEST (ddr)));
{
lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
- dist_v[DDR_INNER_LOOP (ddr)] = 1;
+ dist_v[0] = 1;
save_dist_v (ddr, dist_v);
}
reference, returns false, otherwise returns true. NEST is the outermost
loop of the loop nest in which the references should be analyzed. */
-bool
+opt_result
find_data_references_in_stmt (struct loop *nest, gimple *stmt,
vec<data_reference_p> *datarefs)
{
unsigned i;
auto_vec<data_ref_loc, 2> references;
data_ref_loc *ref;
- bool ret = true;
data_reference_p dr;
if (get_references_in_stmt (stmt, &references))
- return false;
+ return opt_result::failure_at (stmt, "statement clobbers memory: %G",
+ stmt);
FOR_EACH_VEC_ELT (references, i, ref)
{
datarefs->safe_push (dr);
}
- return ret;
+ return opt_result::success ();
}
/* Stores the data references in STMT to DATAREFS. If there is an
return alignment;
}
+/* If BASE is a pointer-typed SSA name, try to find the object that it
+ is based on. Return this object X on success and store the alignment
+ in bytes of BASE - &X in *ALIGNMENT_OUT. */
+
+static tree
+get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
+{
+ if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
+ return NULL_TREE;
+
+ gimple *def = SSA_NAME_DEF_STMT (base);
+ base = analyze_scalar_evolution (loop_containing_stmt (def), base);
+
+ /* Peel chrecs and record the minimum alignment preserved by
+ all steps. */
+ unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
+ while (TREE_CODE (base) == POLYNOMIAL_CHREC)
+ {
+ unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
+ alignment = MIN (alignment, step_alignment);
+ base = CHREC_LEFT (base);
+ }
+
+ /* Punt if the expression is too complicated to handle. */
+ if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
+ return NULL_TREE;
+
+ /* The only useful cases are those for which a dereference folds to something
+ other than an INDIRECT_REF. */
+ tree ref_type = TREE_TYPE (TREE_TYPE (base));
+ tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
+ if (!ref)
+ return NULL_TREE;
+
+ /* Analyze the base to which the steps we peeled were applied. */
+ poly_int64 bitsize, bitpos, bytepos;
+ machine_mode mode;
+ int unsignedp, reversep, volatilep;
+ tree offset;
+ base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
+ if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
+ return NULL_TREE;
+
+ /* Restrict the alignment to that guaranteed by the offsets. */
+ unsigned int bytepos_alignment = known_alignment (bytepos);
+ if (bytepos_alignment != 0)
+ alignment = MIN (alignment, bytepos_alignment);
+ if (offset)
+ {
+ unsigned int offset_alignment = highest_pow2_factor (offset);
+ alignment = MIN (alignment, offset_alignment);
+ }
+
+ *alignment_out = alignment;
+ return base;
+}
+
+/* Return the object whose alignment would need to be changed in order
+ to increase the alignment of ADDR. Store the maximum achievable
+ alignment in *MAX_ALIGNMENT. */
+
+tree
+get_base_for_alignment (tree addr, unsigned int *max_alignment)
+{
+ tree base = get_base_for_alignment_1 (addr, max_alignment);
+ if (base)
+ return base;
+
+ if (TREE_CODE (addr) == ADDR_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
+ return addr;
+}
+
/* Recursive helper function. */
static bool
dr_step_indicator (struct data_reference *dr, int useful_min)
{
tree step = DR_STEP (dr);
+ if (!step)
+ return NULL_TREE;
STRIP_NOPS (step);
/* Look for cases where the step is scaled by a positive constant
integer, which will often be the access size. If the multiplication