1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
83 #include "gimple-pretty-print.h"
85 #include "fold-const.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
95 #include "tree-affine.h"
100 static struct datadep_stats
102 int num_dependence_tests
;
103 int num_dependence_dependent
;
104 int num_dependence_independent
;
105 int num_dependence_undetermined
;
107 int num_subscript_tests
;
108 int num_subscript_undetermined
;
109 int num_same_subscript_function
;
112 int num_ziv_independent
;
113 int num_ziv_dependent
;
114 int num_ziv_unimplemented
;
117 int num_siv_independent
;
118 int num_siv_dependent
;
119 int num_siv_unimplemented
;
122 int num_miv_independent
;
123 int num_miv_dependent
;
124 int num_miv_unimplemented
;
127 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
128 unsigned int, unsigned int,
130 /* Returns true iff A divides B. */
133 tree_fold_divides_p (const_tree a
, const_tree b
)
135 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
136 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
137 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
140 /* Returns true iff A divides B. */
143 int_divides_p (int a
, int b
)
145 return ((b
% a
) == 0);
148 /* Return true if reference REF contains a union access. */
151 ref_contains_union_access_p (tree ref
)
153 while (handled_component_p (ref
))
155 ref
= TREE_OPERAND (ref
, 0);
156 if (TREE_CODE (TREE_TYPE (ref
)) == UNION_TYPE
157 || TREE_CODE (TREE_TYPE (ref
)) == QUAL_UNION_TYPE
)
165 /* Dump into FILE all the data references from DATAREFS. */
168 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
171 struct data_reference
*dr
;
173 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
174 dump_data_reference (file
, dr
);
177 /* Unified dump into FILE all the data references from DATAREFS. */
180 debug (vec
<data_reference_p
> &ref
)
182 dump_data_references (stderr
, ref
);
186 debug (vec
<data_reference_p
> *ptr
)
191 fprintf (stderr
, "<nil>\n");
195 /* Dump into STDERR all the data references from DATAREFS. */
198 debug_data_references (vec
<data_reference_p
> datarefs
)
200 dump_data_references (stderr
, datarefs
);
203 /* Print to STDERR the data_reference DR. */
206 debug_data_reference (struct data_reference
*dr
)
208 dump_data_reference (stderr
, dr
);
211 /* Dump function for a DATA_REFERENCE structure. */
214 dump_data_reference (FILE *outf
,
215 struct data_reference
*dr
)
219 fprintf (outf
, "#(Data Ref: \n");
220 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
221 fprintf (outf
, "# stmt: ");
222 print_gimple_stmt (outf
, DR_STMT (dr
), 0);
223 fprintf (outf
, "# ref: ");
224 print_generic_stmt (outf
, DR_REF (dr
));
225 fprintf (outf
, "# base_object: ");
226 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
));
228 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
230 fprintf (outf
, "# Access function %d: ", i
);
231 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
));
233 fprintf (outf
, "#)\n");
236 /* Unified dump function for a DATA_REFERENCE structure. */
239 debug (data_reference
&ref
)
241 dump_data_reference (stderr
, &ref
);
245 debug (data_reference
*ptr
)
250 fprintf (stderr
, "<nil>\n");
254 /* Dumps the affine function described by FN to the file OUTF. */
257 dump_affine_function (FILE *outf
, affine_fn fn
)
262 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
263 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
265 fprintf (outf
, " + ");
266 print_generic_expr (outf
, coef
, TDF_SLIM
);
267 fprintf (outf
, " * x_%u", i
);
271 /* Dumps the conflict function CF to the file OUTF. */
274 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
278 if (cf
->n
== NO_DEPENDENCE
)
279 fprintf (outf
, "no dependence");
280 else if (cf
->n
== NOT_KNOWN
)
281 fprintf (outf
, "not known");
284 for (i
= 0; i
< cf
->n
; i
++)
289 dump_affine_function (outf
, cf
->fns
[i
]);
295 /* Dump function for a SUBSCRIPT structure. */
298 dump_subscript (FILE *outf
, struct subscript
*subscript
)
300 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
302 fprintf (outf
, "\n (subscript \n");
303 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
304 dump_conflict_function (outf
, cf
);
305 if (CF_NONTRIVIAL_P (cf
))
307 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
308 fprintf (outf
, "\n last_conflict: ");
309 print_generic_expr (outf
, last_iteration
);
312 cf
= SUB_CONFLICTS_IN_B (subscript
);
313 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
314 dump_conflict_function (outf
, cf
);
315 if (CF_NONTRIVIAL_P (cf
))
317 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
318 fprintf (outf
, "\n last_conflict: ");
319 print_generic_expr (outf
, last_iteration
);
322 fprintf (outf
, "\n (Subscript distance: ");
323 print_generic_expr (outf
, SUB_DISTANCE (subscript
));
324 fprintf (outf
, " ))\n");
327 /* Print the classic direction vector DIRV to OUTF. */
330 print_direction_vector (FILE *outf
,
336 for (eq
= 0; eq
< length
; eq
++)
338 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
344 fprintf (outf
, " +");
347 fprintf (outf
, " -");
350 fprintf (outf
, " =");
352 case dir_positive_or_equal
:
353 fprintf (outf
, " +=");
355 case dir_positive_or_negative
:
356 fprintf (outf
, " +-");
358 case dir_negative_or_equal
:
359 fprintf (outf
, " -=");
362 fprintf (outf
, " *");
365 fprintf (outf
, "indep");
369 fprintf (outf
, "\n");
372 /* Print a vector of direction vectors. */
375 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
381 FOR_EACH_VEC_ELT (dir_vects
, j
, v
)
382 print_direction_vector (outf
, v
, length
);
385 /* Print out a vector VEC of length N to OUTFILE. */
388 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
392 for (i
= 0; i
< n
; i
++)
393 fprintf (outfile
, "%3d ", (int)vector
[i
]);
394 fprintf (outfile
, "\n");
397 /* Print a vector of distance vectors. */
400 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
406 FOR_EACH_VEC_ELT (dist_vects
, j
, v
)
407 print_lambda_vector (outf
, v
, length
);
410 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
413 dump_data_dependence_relation (FILE *outf
,
414 struct data_dependence_relation
*ddr
)
416 struct data_reference
*dra
, *drb
;
418 fprintf (outf
, "(Data Dep: \n");
420 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
427 dump_data_reference (outf
, dra
);
429 fprintf (outf
, " (nil)\n");
431 dump_data_reference (outf
, drb
);
433 fprintf (outf
, " (nil)\n");
435 fprintf (outf
, " (don't know)\n)\n");
441 dump_data_reference (outf
, dra
);
442 dump_data_reference (outf
, drb
);
444 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
445 fprintf (outf
, " (no dependence)\n");
447 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
453 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
455 fprintf (outf
, " access_fn_A: ");
456 print_generic_stmt (outf
, SUB_ACCESS_FN (sub
, 0));
457 fprintf (outf
, " access_fn_B: ");
458 print_generic_stmt (outf
, SUB_ACCESS_FN (sub
, 1));
459 dump_subscript (outf
, sub
);
462 fprintf (outf
, " loop nest: (");
463 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
464 fprintf (outf
, "%d ", loopi
->num
);
465 fprintf (outf
, ")\n");
467 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
469 fprintf (outf
, " distance_vector: ");
470 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
474 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
476 fprintf (outf
, " direction_vector: ");
477 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
482 fprintf (outf
, ")\n");
488 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
490 dump_data_dependence_relation (stderr
, ddr
);
493 /* Dump into FILE all the dependence relations from DDRS. */
496 dump_data_dependence_relations (FILE *file
,
500 struct data_dependence_relation
*ddr
;
502 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
503 dump_data_dependence_relation (file
, ddr
);
507 debug (vec
<ddr_p
> &ref
)
509 dump_data_dependence_relations (stderr
, ref
);
513 debug (vec
<ddr_p
> *ptr
)
518 fprintf (stderr
, "<nil>\n");
522 /* Dump to STDERR all the dependence relations from DDRS. */
525 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
527 dump_data_dependence_relations (stderr
, ddrs
);
530 /* Dumps the distance and direction vectors in FILE. DDRS contains
531 the dependence relations, and VECT_SIZE is the size of the
532 dependence vectors, or in other words the number of loops in the
536 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
539 struct data_dependence_relation
*ddr
;
542 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
543 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
545 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), j
, v
)
547 fprintf (file
, "DISTANCE_V (");
548 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
549 fprintf (file
, ")\n");
552 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), j
, v
)
554 fprintf (file
, "DIRECTION_V (");
555 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
556 fprintf (file
, ")\n");
560 fprintf (file
, "\n\n");
563 /* Dumps the data dependence relations DDRS in FILE. */
566 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
569 struct data_dependence_relation
*ddr
;
571 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
572 dump_data_dependence_relation (file
, ddr
);
574 fprintf (file
, "\n\n");
578 debug_ddrs (vec
<ddr_p
> ddrs
)
580 dump_ddrs (stderr
, ddrs
);
584 split_constant_offset (tree exp
, tree
*var
, tree
*off
,
585 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
588 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
589 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
590 constant of type ssizetype, and returns true. If we cannot do this
591 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
595 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
596 tree
*var
, tree
*off
,
597 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
602 enum tree_code ocode
= code
;
610 *var
= build_int_cst (type
, 0);
611 *off
= fold_convert (ssizetype
, op0
);
614 case POINTER_PLUS_EXPR
:
619 if (TREE_CODE (op1
) == INTEGER_CST
)
621 split_constant_offset (op0
, &var0
, &off0
, cache
, limit
);
623 *off
= size_binop (ocode
, off0
, fold_convert (ssizetype
, op1
));
626 split_constant_offset (op0
, &var0
, &off0
, cache
, limit
);
627 split_constant_offset (op1
, &var1
, &off1
, cache
, limit
);
628 *var
= fold_build2 (code
, type
, var0
, var1
);
629 *off
= size_binop (ocode
, off0
, off1
);
633 if (TREE_CODE (op1
) != INTEGER_CST
)
636 split_constant_offset (op0
, &var0
, &off0
, cache
, limit
);
637 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
638 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
644 poly_int64 pbitsize
, pbitpos
, pbytepos
;
646 int punsignedp
, preversep
, pvolatilep
;
648 op0
= TREE_OPERAND (op0
, 0);
650 = get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
651 &punsignedp
, &preversep
, &pvolatilep
);
653 if (!multiple_p (pbitpos
, BITS_PER_UNIT
, &pbytepos
))
655 base
= build_fold_addr_expr (base
);
656 off0
= ssize_int (pbytepos
);
660 split_constant_offset (poffset
, &poffset
, &off1
, cache
, limit
);
661 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
662 if (POINTER_TYPE_P (TREE_TYPE (base
)))
663 base
= fold_build_pointer_plus (base
, poffset
);
665 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
666 fold_convert (TREE_TYPE (base
), poffset
));
669 var0
= fold_convert (type
, base
);
671 /* If variable length types are involved, punt, otherwise casts
672 might be converted into ARRAY_REFs in gimplify_conversion.
673 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
674 possibly no longer appears in current GIMPLE, might resurface.
675 This perhaps could run
676 if (CONVERT_EXPR_P (var0))
678 gimplify_conversion (&var0);
679 // Attempt to fill in any within var0 found ARRAY_REF's
680 // element size from corresponding op embedded ARRAY_REF,
681 // if unsuccessful, just punt.
683 while (POINTER_TYPE_P (type
))
684 type
= TREE_TYPE (type
);
685 if (int_size_in_bytes (type
) < 0)
695 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0
))
698 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op0
);
699 enum tree_code subcode
;
701 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
704 subcode
= gimple_assign_rhs_code (def_stmt
);
706 /* We are using a cache to avoid un-CSEing large amounts of code. */
707 bool use_cache
= false;
708 if (!has_single_use (op0
)
709 && (subcode
== POINTER_PLUS_EXPR
710 || subcode
== PLUS_EXPR
711 || subcode
== MINUS_EXPR
712 || subcode
== MULT_EXPR
713 || subcode
== ADDR_EXPR
714 || CONVERT_EXPR_CODE_P (subcode
)))
718 std::pair
<tree
, tree
> &e
= cache
.get_or_insert (op0
, &existed
);
721 if (integer_zerop (e
.second
))
727 e
= std::make_pair (op0
, ssize_int (0));
734 var0
= gimple_assign_rhs1 (def_stmt
);
735 var1
= gimple_assign_rhs2 (def_stmt
);
737 bool res
= split_constant_offset_1 (type
, var0
, subcode
, var1
,
738 var
, off
, cache
, limit
);
739 if (res
&& use_cache
)
740 *cache
.get (op0
) = std::make_pair (*var
, *off
);
745 /* We must not introduce undefined overflow, and we must not change
746 the value. Hence we're okay if the inner type doesn't overflow
747 to start with (pointer or signed), the outer type also is an
748 integer or pointer and the outer precision is at least as large
750 tree itype
= TREE_TYPE (op0
);
751 if ((POINTER_TYPE_P (itype
)
752 || (INTEGRAL_TYPE_P (itype
) && !TYPE_OVERFLOW_TRAPS (itype
)))
753 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
754 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
756 if (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_WRAPS (itype
))
758 /* Split the unconverted operand and try to prove that
759 wrapping isn't a problem. */
760 tree tmp_var
, tmp_off
;
761 split_constant_offset (op0
, &tmp_var
, &tmp_off
, cache
, limit
);
763 /* See whether we have an SSA_NAME whose range is known
765 if (TREE_CODE (tmp_var
) != SSA_NAME
)
767 wide_int var_min
, var_max
;
768 value_range_kind vr_type
= get_range_info (tmp_var
, &var_min
,
770 wide_int var_nonzero
= get_nonzero_bits (tmp_var
);
771 signop sgn
= TYPE_SIGN (itype
);
772 if (intersect_range_with_nonzero_bits (vr_type
, &var_min
,
773 &var_max
, var_nonzero
,
777 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
778 is known to be [A + TMP_OFF, B + TMP_OFF], with all
779 operations done in ITYPE. The addition must overflow
780 at both ends of the range or at neither. */
781 wi::overflow_type overflow
[2];
782 unsigned int prec
= TYPE_PRECISION (itype
);
783 wide_int woff
= wi::to_wide (tmp_off
, prec
);
784 wide_int op0_min
= wi::add (var_min
, woff
, sgn
, &overflow
[0]);
785 wi::add (var_max
, woff
, sgn
, &overflow
[1]);
786 if ((overflow
[0] != wi::OVF_NONE
) != (overflow
[1] != wi::OVF_NONE
))
789 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
790 widest_int diff
= (widest_int::from (op0_min
, sgn
)
791 - widest_int::from (var_min
, sgn
));
793 *off
= wide_int_to_tree (ssizetype
, diff
);
796 split_constant_offset (op0
, &var0
, off
, cache
, limit
);
797 *var
= fold_convert (type
, var0
);
808 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
809 will be ssizetype. */
812 split_constant_offset (tree exp
, tree
*var
, tree
*off
,
813 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
816 tree type
= TREE_TYPE (exp
), op0
, op1
, e
, o
;
820 *off
= ssize_int (0);
822 if (tree_is_chrec (exp
)
823 || get_gimple_rhs_class (TREE_CODE (exp
)) == GIMPLE_TERNARY_RHS
)
826 code
= TREE_CODE (exp
);
827 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
828 if (split_constant_offset_1 (type
, op0
, code
, op1
, &e
, &o
, cache
, limit
))
836 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
838 unsigned limit
= param_ssa_name_def_chain_limit
;
839 static hash_map
<tree
, std::pair
<tree
, tree
> > *cache
;
841 cache
= new hash_map
<tree
, std::pair
<tree
, tree
> > (37);
842 split_constant_offset (exp
, var
, off
, *cache
, &limit
);
846 /* Returns the address ADDR of an object in a canonical shape (without nop
847 casts, and with type of pointer to the object). */
850 canonicalize_base_object_address (tree addr
)
856 /* The base address may be obtained by casting from integer, in that case
858 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
861 if (TREE_CODE (addr
) != ADDR_EXPR
)
864 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
867 /* Analyze the behavior of memory reference REF within STMT.
870 - BB analysis. In this case we simply split the address into base,
871 init and offset components, without reference to any containing loop.
872 The resulting base and offset are general expressions and they can
873 vary arbitrarily from one iteration of the containing loop to the next.
874 The step is always zero.
876 - loop analysis. In this case we analyze the reference both wrt LOOP
877 and on the basis that the reference occurs (is "used") in LOOP;
878 see the comment above analyze_scalar_evolution_in_loop for more
879 information about this distinction. The base, init, offset and
880 step fields are all invariant in LOOP.
882 Perform BB analysis if LOOP is null, or if LOOP is the function's
883 dummy outermost loop. In other cases perform loop analysis.
885 Return true if the analysis succeeded and store the results in DRB if so.
886 BB analysis can only fail for bitfield or reversed-storage accesses. */
889 dr_analyze_innermost (innermost_loop_behavior
*drb
, tree ref
,
890 class loop
*loop
, const gimple
*stmt
)
892 poly_int64 pbitsize
, pbitpos
;
895 int punsignedp
, preversep
, pvolatilep
;
896 affine_iv base_iv
, offset_iv
;
897 tree init
, dinit
, step
;
898 bool in_loop
= (loop
&& loop
->num
);
900 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
901 fprintf (dump_file
, "analyze_innermost: ");
903 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
904 &punsignedp
, &preversep
, &pvolatilep
);
905 gcc_assert (base
!= NULL_TREE
);
908 if (!multiple_p (pbitpos
, BITS_PER_UNIT
, &pbytepos
))
909 return opt_result::failure_at (stmt
,
910 "failed: bit offset alignment.\n");
913 return opt_result::failure_at (stmt
,
914 "failed: reverse storage order.\n");
916 /* Calculate the alignment and misalignment for the inner reference. */
917 unsigned int HOST_WIDE_INT bit_base_misalignment
;
918 unsigned int bit_base_alignment
;
919 get_object_alignment_1 (base
, &bit_base_alignment
, &bit_base_misalignment
);
921 /* There are no bitfield references remaining in BASE, so the values
922 we got back must be whole bytes. */
923 gcc_assert (bit_base_alignment
% BITS_PER_UNIT
== 0
924 && bit_base_misalignment
% BITS_PER_UNIT
== 0);
925 unsigned int base_alignment
= bit_base_alignment
/ BITS_PER_UNIT
;
926 poly_int64 base_misalignment
= bit_base_misalignment
/ BITS_PER_UNIT
;
928 if (TREE_CODE (base
) == MEM_REF
)
930 if (!integer_zerop (TREE_OPERAND (base
, 1)))
932 /* Subtract MOFF from the base and add it to POFFSET instead.
933 Adjust the misalignment to reflect the amount we subtracted. */
934 poly_offset_int moff
= mem_ref_offset (base
);
935 base_misalignment
-= moff
.force_shwi ();
936 tree mofft
= wide_int_to_tree (sizetype
, moff
);
940 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
942 base
= TREE_OPERAND (base
, 0);
945 base
= build_fold_addr_expr (base
);
949 if (!simple_iv (loop
, loop
, base
, &base_iv
, true))
950 return opt_result::failure_at
951 (stmt
, "failed: evolution of base is not affine.\n");
956 base_iv
.step
= ssize_int (0);
957 base_iv
.no_overflow
= true;
962 offset_iv
.base
= ssize_int (0);
963 offset_iv
.step
= ssize_int (0);
969 offset_iv
.base
= poffset
;
970 offset_iv
.step
= ssize_int (0);
972 else if (!simple_iv (loop
, loop
, poffset
, &offset_iv
, true))
973 return opt_result::failure_at
974 (stmt
, "failed: evolution of offset is not affine.\n");
977 init
= ssize_int (pbytepos
);
979 /* Subtract any constant component from the base and add it to INIT instead.
980 Adjust the misalignment to reflect the amount we subtracted. */
981 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
982 init
= size_binop (PLUS_EXPR
, init
, dinit
);
983 base_misalignment
-= TREE_INT_CST_LOW (dinit
);
985 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
986 init
= size_binop (PLUS_EXPR
, init
, dinit
);
988 step
= size_binop (PLUS_EXPR
,
989 fold_convert (ssizetype
, base_iv
.step
),
990 fold_convert (ssizetype
, offset_iv
.step
));
992 base
= canonicalize_base_object_address (base_iv
.base
);
994 /* See if get_pointer_alignment can guarantee a higher alignment than
995 the one we calculated above. */
996 unsigned int HOST_WIDE_INT alt_misalignment
;
997 unsigned int alt_alignment
;
998 get_pointer_alignment_1 (base
, &alt_alignment
, &alt_misalignment
);
1000 /* As above, these values must be whole bytes. */
1001 gcc_assert (alt_alignment
% BITS_PER_UNIT
== 0
1002 && alt_misalignment
% BITS_PER_UNIT
== 0);
1003 alt_alignment
/= BITS_PER_UNIT
;
1004 alt_misalignment
/= BITS_PER_UNIT
;
1006 if (base_alignment
< alt_alignment
)
1008 base_alignment
= alt_alignment
;
1009 base_misalignment
= alt_misalignment
;
1012 drb
->base_address
= base
;
1013 drb
->offset
= fold_convert (ssizetype
, offset_iv
.base
);
1016 if (known_misalignment (base_misalignment
, base_alignment
,
1017 &drb
->base_misalignment
))
1018 drb
->base_alignment
= base_alignment
;
1021 drb
->base_alignment
= known_alignment (base_misalignment
);
1022 drb
->base_misalignment
= 0;
1024 drb
->offset_alignment
= highest_pow2_factor (offset_iv
.base
);
1025 drb
->step_alignment
= highest_pow2_factor (step
);
1027 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1028 fprintf (dump_file
, "success.\n");
1030 return opt_result::success ();
1033 /* Return true if OP is a valid component reference for a DR access
1034 function. This accepts a subset of what handled_component_p accepts. */
1037 access_fn_component_p (tree op
)
1039 switch (TREE_CODE (op
))
1047 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op
, 0))) == RECORD_TYPE
;
1054 /* Determines the base object and the list of indices of memory reference
1055 DR, analyzed in LOOP and instantiated before NEST. */
1058 dr_analyze_indices (struct data_reference
*dr
, edge nest
, loop_p loop
)
1060 vec
<tree
> access_fns
= vNULL
;
1062 tree base
, off
, access_fn
;
1064 /* If analyzing a basic-block there are no indices to analyze
1065 and thus no access functions. */
1068 DR_BASE_OBJECT (dr
) = DR_REF (dr
);
1069 DR_ACCESS_FNS (dr
).create (0);
1075 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1076 into a two element array with a constant index. The base is
1077 then just the immediate underlying object. */
1078 if (TREE_CODE (ref
) == REALPART_EXPR
)
1080 ref
= TREE_OPERAND (ref
, 0);
1081 access_fns
.safe_push (integer_zero_node
);
1083 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
1085 ref
= TREE_OPERAND (ref
, 0);
1086 access_fns
.safe_push (integer_one_node
);
1089 /* Analyze access functions of dimensions we know to be independent.
1090 The list of component references handled here should be kept in
1091 sync with access_fn_component_p. */
1092 while (handled_component_p (ref
))
1094 if (TREE_CODE (ref
) == ARRAY_REF
)
1096 op
= TREE_OPERAND (ref
, 1);
1097 access_fn
= analyze_scalar_evolution (loop
, op
);
1098 access_fn
= instantiate_scev (nest
, loop
, access_fn
);
1099 access_fns
.safe_push (access_fn
);
1101 else if (TREE_CODE (ref
) == COMPONENT_REF
1102 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
1104 /* For COMPONENT_REFs of records (but not unions!) use the
1105 FIELD_DECL offset as constant access function so we can
1106 disambiguate a[i].f1 and a[i].f2. */
1107 tree off
= component_ref_field_offset (ref
);
1108 off
= size_binop (PLUS_EXPR
,
1109 size_binop (MULT_EXPR
,
1110 fold_convert (bitsizetype
, off
),
1111 bitsize_int (BITS_PER_UNIT
)),
1112 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
1113 access_fns
.safe_push (off
);
1116 /* If we have an unhandled component we could not translate
1117 to an access function stop analyzing. We have determined
1118 our base object in this case. */
1121 ref
= TREE_OPERAND (ref
, 0);
1124 /* If the address operand of a MEM_REF base has an evolution in the
1125 analyzed nest, add it as an additional independent access-function. */
1126 if (TREE_CODE (ref
) == MEM_REF
)
1128 op
= TREE_OPERAND (ref
, 0);
1129 access_fn
= analyze_scalar_evolution (loop
, op
);
1130 access_fn
= instantiate_scev (nest
, loop
, access_fn
);
1131 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
1134 tree memoff
= TREE_OPERAND (ref
, 1);
1135 base
= initial_condition (access_fn
);
1136 orig_type
= TREE_TYPE (base
);
1137 STRIP_USELESS_TYPE_CONVERSION (base
);
1138 split_constant_offset (base
, &base
, &off
);
1139 STRIP_USELESS_TYPE_CONVERSION (base
);
1140 /* Fold the MEM_REF offset into the evolutions initial
1141 value to make more bases comparable. */
1142 if (!integer_zerop (memoff
))
1144 off
= size_binop (PLUS_EXPR
, off
,
1145 fold_convert (ssizetype
, memoff
));
1146 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
1148 /* Adjust the offset so it is a multiple of the access type
1149 size and thus we separate bases that can possibly be used
1150 to produce partial overlaps (which the access_fn machinery
1153 if (TYPE_SIZE_UNIT (TREE_TYPE (ref
))
1154 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref
))) == INTEGER_CST
1155 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref
))))
1158 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref
))),
1161 /* If we can't compute the remainder simply force the initial
1162 condition to zero. */
1163 rem
= wi::to_wide (off
);
1164 off
= wide_int_to_tree (ssizetype
, wi::to_wide (off
) - rem
);
1165 memoff
= wide_int_to_tree (TREE_TYPE (memoff
), rem
);
1166 /* And finally replace the initial condition. */
1167 access_fn
= chrec_replace_initial_condition
1168 (access_fn
, fold_convert (orig_type
, off
));
1169 /* ??? This is still not a suitable base object for
1170 dr_may_alias_p - the base object needs to be an
1171 access that covers the object as whole. With
1172 an evolution in the pointer this cannot be
1174 As a band-aid, mark the access so we can special-case
1175 it in dr_may_alias_p. */
1177 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
1178 MEM_REF
, TREE_TYPE (ref
),
1180 MR_DEPENDENCE_CLIQUE (ref
) = MR_DEPENDENCE_CLIQUE (old
);
1181 MR_DEPENDENCE_BASE (ref
) = MR_DEPENDENCE_BASE (old
);
1182 DR_UNCONSTRAINED_BASE (dr
) = true;
1183 access_fns
.safe_push (access_fn
);
1186 else if (DECL_P (ref
))
1188 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1189 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
1190 build_fold_addr_expr (ref
),
1191 build_int_cst (reference_alias_ptr_type (ref
), 0));
1194 DR_BASE_OBJECT (dr
) = ref
;
1195 DR_ACCESS_FNS (dr
) = access_fns
;
1198 /* Extracts the alias analysis information from the memory reference DR. */
1201 dr_analyze_alias (struct data_reference
*dr
)
1203 tree ref
= DR_REF (dr
);
1204 tree base
= get_base_address (ref
), addr
;
1206 if (INDIRECT_REF_P (base
)
1207 || TREE_CODE (base
) == MEM_REF
)
1209 addr
= TREE_OPERAND (base
, 0);
1210 if (TREE_CODE (addr
) == SSA_NAME
)
1211 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1215 /* Frees data reference DR. */
1218 free_data_ref (data_reference_p dr
)
1220 DR_ACCESS_FNS (dr
).release ();
1224 /* Analyze memory reference MEMREF, which is accessed in STMT.
1225 The reference is a read if IS_READ is true, otherwise it is a write.
1226 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1227 within STMT, i.e. that it might not occur even if STMT is executed
1228 and runs to completion.
1230 Return the data_reference description of MEMREF. NEST is the outermost
1231 loop in which the reference should be instantiated, LOOP is the loop
1232 in which the data reference should be analyzed. */
1234 struct data_reference
*
1235 create_data_ref (edge nest
, loop_p loop
, tree memref
, gimple
*stmt
,
1236 bool is_read
, bool is_conditional_in_stmt
)
1238 struct data_reference
*dr
;
1240 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1242 fprintf (dump_file
, "Creating dr for ");
1243 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1244 fprintf (dump_file
, "\n");
1247 dr
= XCNEW (struct data_reference
);
1248 DR_STMT (dr
) = stmt
;
1249 DR_REF (dr
) = memref
;
1250 DR_IS_READ (dr
) = is_read
;
1251 DR_IS_CONDITIONAL_IN_STMT (dr
) = is_conditional_in_stmt
;
1253 dr_analyze_innermost (&DR_INNERMOST (dr
), memref
,
1254 nest
!= NULL
? loop
: NULL
, stmt
);
1255 dr_analyze_indices (dr
, nest
, loop
);
1256 dr_analyze_alias (dr
);
1258 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1261 fprintf (dump_file
, "\tbase_address: ");
1262 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1263 fprintf (dump_file
, "\n\toffset from base address: ");
1264 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1265 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1266 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1267 fprintf (dump_file
, "\n\tstep: ");
1268 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1269 fprintf (dump_file
, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr
));
1270 fprintf (dump_file
, "\n\tbase misalignment: %d",
1271 DR_BASE_MISALIGNMENT (dr
));
1272 fprintf (dump_file
, "\n\toffset alignment: %d",
1273 DR_OFFSET_ALIGNMENT (dr
));
1274 fprintf (dump_file
, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr
));
1275 fprintf (dump_file
, "\n\tbase_object: ");
1276 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1277 fprintf (dump_file
, "\n");
1278 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1280 fprintf (dump_file
, "\tAccess function %d: ", i
);
1281 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1288 /* A helper function computes order between two tree expressions T1 and T2.
1289 This is used in comparator functions sorting objects based on the order
1290 of tree expressions. The function returns -1, 0, or 1. */
1293 data_ref_compare_tree (tree t1
, tree t2
)
1296 enum tree_code code
;
1306 STRIP_USELESS_TYPE_CONVERSION (t1
);
1307 STRIP_USELESS_TYPE_CONVERSION (t2
);
1311 if (TREE_CODE (t1
) != TREE_CODE (t2
)
1312 && ! (CONVERT_EXPR_P (t1
) && CONVERT_EXPR_P (t2
)))
1313 return TREE_CODE (t1
) < TREE_CODE (t2
) ? -1 : 1;
1315 code
= TREE_CODE (t1
);
1319 return tree_int_cst_compare (t1
, t2
);
1322 if (TREE_STRING_LENGTH (t1
) != TREE_STRING_LENGTH (t2
))
1323 return TREE_STRING_LENGTH (t1
) < TREE_STRING_LENGTH (t2
) ? -1 : 1;
1324 return memcmp (TREE_STRING_POINTER (t1
), TREE_STRING_POINTER (t2
),
1325 TREE_STRING_LENGTH (t1
));
1328 if (SSA_NAME_VERSION (t1
) != SSA_NAME_VERSION (t2
))
1329 return SSA_NAME_VERSION (t1
) < SSA_NAME_VERSION (t2
) ? -1 : 1;
1333 if (POLY_INT_CST_P (t1
))
1334 return compare_sizes_for_sort (wi::to_poly_widest (t1
),
1335 wi::to_poly_widest (t2
));
1337 tclass
= TREE_CODE_CLASS (code
);
1339 /* For decls, compare their UIDs. */
1340 if (tclass
== tcc_declaration
)
1342 if (DECL_UID (t1
) != DECL_UID (t2
))
1343 return DECL_UID (t1
) < DECL_UID (t2
) ? -1 : 1;
1346 /* For expressions, compare their operands recursively. */
1347 else if (IS_EXPR_CODE_CLASS (tclass
))
1349 for (i
= TREE_OPERAND_LENGTH (t1
) - 1; i
>= 0; --i
)
1351 cmp
= data_ref_compare_tree (TREE_OPERAND (t1
, i
),
1352 TREE_OPERAND (t2
, i
));
1364 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1368 runtime_alias_check_p (ddr_p ddr
, class loop
*loop
, bool speed_p
)
1370 if (dump_enabled_p ())
1371 dump_printf (MSG_NOTE
,
1372 "consider run-time aliasing test between %T and %T\n",
1373 DR_REF (DDR_A (ddr
)), DR_REF (DDR_B (ddr
)));
1376 return opt_result::failure_at (DR_STMT (DDR_A (ddr
)),
1377 "runtime alias check not supported when"
1378 " optimizing for size.\n");
1380 /* FORNOW: We don't support versioning with outer-loop in either
1381 vectorization or loop distribution. */
1382 if (loop
!= NULL
&& loop
->inner
!= NULL
)
1383 return opt_result::failure_at (DR_STMT (DDR_A (ddr
)),
1384 "runtime alias check not supported for"
1387 return opt_result::success ();
1390 /* Operator == between two dr_with_seg_len objects.
1392 This equality operator is used to make sure two data refs
1393 are the same one so that we will consider to combine the
1394 aliasing checks of those two pairs of data dependent data
1398 operator == (const dr_with_seg_len
& d1
,
1399 const dr_with_seg_len
& d2
)
1401 return (operand_equal_p (DR_BASE_ADDRESS (d1
.dr
),
1402 DR_BASE_ADDRESS (d2
.dr
), 0)
1403 && data_ref_compare_tree (DR_OFFSET (d1
.dr
), DR_OFFSET (d2
.dr
)) == 0
1404 && data_ref_compare_tree (DR_INIT (d1
.dr
), DR_INIT (d2
.dr
)) == 0
1405 && data_ref_compare_tree (d1
.seg_len
, d2
.seg_len
) == 0
1406 && known_eq (d1
.access_size
, d2
.access_size
)
1407 && d1
.align
== d2
.align
);
1410 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1411 so that we can combine aliasing checks in one scan. */
1414 comp_dr_with_seg_len_pair (const void *pa_
, const void *pb_
)
1416 const dr_with_seg_len_pair_t
* pa
= (const dr_with_seg_len_pair_t
*) pa_
;
1417 const dr_with_seg_len_pair_t
* pb
= (const dr_with_seg_len_pair_t
*) pb_
;
1418 const dr_with_seg_len
&a1
= pa
->first
, &a2
= pa
->second
;
1419 const dr_with_seg_len
&b1
= pb
->first
, &b2
= pb
->second
;
1421 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1422 if a and c have the same basic address snd step, and b and d have the same
1423 address and step. Therefore, if any a&c or b&d don't have the same address
1424 and step, we don't care the order of those two pairs after sorting. */
1427 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a1
.dr
),
1428 DR_BASE_ADDRESS (b1
.dr
))) != 0)
1430 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a2
.dr
),
1431 DR_BASE_ADDRESS (b2
.dr
))) != 0)
1433 if ((comp_res
= data_ref_compare_tree (DR_STEP (a1
.dr
),
1434 DR_STEP (b1
.dr
))) != 0)
1436 if ((comp_res
= data_ref_compare_tree (DR_STEP (a2
.dr
),
1437 DR_STEP (b2
.dr
))) != 0)
1439 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a1
.dr
),
1440 DR_OFFSET (b1
.dr
))) != 0)
1442 if ((comp_res
= data_ref_compare_tree (DR_INIT (a1
.dr
),
1443 DR_INIT (b1
.dr
))) != 0)
1445 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a2
.dr
),
1446 DR_OFFSET (b2
.dr
))) != 0)
1448 if ((comp_res
= data_ref_compare_tree (DR_INIT (a2
.dr
),
1449 DR_INIT (b2
.dr
))) != 0)
1455 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1458 dump_alias_pair (dr_with_seg_len_pair_t
*alias_pair
, const char *indent
)
1460 dump_printf (MSG_NOTE
, "%sreference: %T vs. %T\n", indent
,
1461 DR_REF (alias_pair
->first
.dr
),
1462 DR_REF (alias_pair
->second
.dr
));
1464 dump_printf (MSG_NOTE
, "%ssegment length: %T", indent
,
1465 alias_pair
->first
.seg_len
);
1466 if (!operand_equal_p (alias_pair
->first
.seg_len
,
1467 alias_pair
->second
.seg_len
, 0))
1468 dump_printf (MSG_NOTE
, " vs. %T", alias_pair
->second
.seg_len
);
1470 dump_printf (MSG_NOTE
, "\n%saccess size: ", indent
);
1471 dump_dec (MSG_NOTE
, alias_pair
->first
.access_size
);
1472 if (maybe_ne (alias_pair
->first
.access_size
, alias_pair
->second
.access_size
))
1474 dump_printf (MSG_NOTE
, " vs. ");
1475 dump_dec (MSG_NOTE
, alias_pair
->second
.access_size
);
1478 dump_printf (MSG_NOTE
, "\n%salignment: %d", indent
,
1479 alias_pair
->first
.align
);
1480 if (alias_pair
->first
.align
!= alias_pair
->second
.align
)
1481 dump_printf (MSG_NOTE
, " vs. %d", alias_pair
->second
.align
);
1483 dump_printf (MSG_NOTE
, "\n%sflags: ", indent
);
1484 if (alias_pair
->flags
& DR_ALIAS_RAW
)
1485 dump_printf (MSG_NOTE
, " RAW");
1486 if (alias_pair
->flags
& DR_ALIAS_WAR
)
1487 dump_printf (MSG_NOTE
, " WAR");
1488 if (alias_pair
->flags
& DR_ALIAS_WAW
)
1489 dump_printf (MSG_NOTE
, " WAW");
1490 if (alias_pair
->flags
& DR_ALIAS_ARBITRARY
)
1491 dump_printf (MSG_NOTE
, " ARBITRARY");
1492 if (alias_pair
->flags
& DR_ALIAS_SWAPPED
)
1493 dump_printf (MSG_NOTE
, " SWAPPED");
1494 if (alias_pair
->flags
& DR_ALIAS_UNSWAPPED
)
1495 dump_printf (MSG_NOTE
, " UNSWAPPED");
1496 if (alias_pair
->flags
& DR_ALIAS_MIXED_STEPS
)
1497 dump_printf (MSG_NOTE
, " MIXED_STEPS");
1498 if (alias_pair
->flags
== 0)
1499 dump_printf (MSG_NOTE
, " <none>");
1500 dump_printf (MSG_NOTE
, "\n");
1503 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1504 FACTOR is number of iterations that each data reference is accessed.
1506 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1507 we create an expression:
1509 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1510 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1512 for aliasing checks. However, in some cases we can decrease the number
1513 of checks by combining two checks into one. For example, suppose we have
1514 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1515 condition is satisfied:
1517 load_ptr_0 < load_ptr_1 &&
1518 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1520 (this condition means, in each iteration of vectorized loop, the accessed
1521 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1524 we then can use only the following expression to finish the alising checks
1525 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1527 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1528 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1530 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1534 prune_runtime_alias_test_list (vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
1537 /* Canonicalize each pair so that the base components are ordered wrt
1538 data_ref_compare_tree. This allows the loop below to merge more
1541 dr_with_seg_len_pair_t
*alias_pair
;
1542 FOR_EACH_VEC_ELT (*alias_pairs
, i
, alias_pair
)
1544 data_reference_p dr_a
= alias_pair
->first
.dr
;
1545 data_reference_p dr_b
= alias_pair
->second
.dr
;
1546 int comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (dr_a
),
1547 DR_BASE_ADDRESS (dr_b
));
1549 comp_res
= data_ref_compare_tree (DR_OFFSET (dr_a
), DR_OFFSET (dr_b
));
1551 comp_res
= data_ref_compare_tree (DR_INIT (dr_a
), DR_INIT (dr_b
));
1554 std::swap (alias_pair
->first
, alias_pair
->second
);
1555 alias_pair
->flags
|= DR_ALIAS_SWAPPED
;
1558 alias_pair
->flags
|= DR_ALIAS_UNSWAPPED
;
1561 /* Sort the collected data ref pairs so that we can scan them once to
1562 combine all possible aliasing checks. */
1563 alias_pairs
->qsort (comp_dr_with_seg_len_pair
);
1565 /* Scan the sorted dr pairs and check if we can combine alias checks
1566 of two neighboring dr pairs. */
1567 for (i
= 1; i
< alias_pairs
->length (); ++i
)
1569 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1570 dr_with_seg_len_pair_t
*alias_pair1
= &(*alias_pairs
)[i
- 1];
1571 dr_with_seg_len_pair_t
*alias_pair2
= &(*alias_pairs
)[i
];
1573 dr_with_seg_len
*dr_a1
= &alias_pair1
->first
;
1574 dr_with_seg_len
*dr_b1
= &alias_pair1
->second
;
1575 dr_with_seg_len
*dr_a2
= &alias_pair2
->first
;
1576 dr_with_seg_len
*dr_b2
= &alias_pair2
->second
;
1578 /* Remove duplicate data ref pairs. */
1579 if (*dr_a1
== *dr_a2
&& *dr_b1
== *dr_b2
)
1581 if (dump_enabled_p ())
1582 dump_printf (MSG_NOTE
, "found equal ranges %T, %T and %T, %T\n",
1583 DR_REF (dr_a1
->dr
), DR_REF (dr_b1
->dr
),
1584 DR_REF (dr_a2
->dr
), DR_REF (dr_b2
->dr
));
1585 alias_pair1
->flags
|= alias_pair2
->flags
;
1586 alias_pairs
->ordered_remove (i
--);
1590 if (*dr_a1
== *dr_a2
|| *dr_b1
== *dr_b2
)
1592 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1593 and DR_A1 and DR_A2 are two consecutive memrefs. */
1594 if (*dr_a1
== *dr_a2
)
1596 std::swap (dr_a1
, dr_b1
);
1597 std::swap (dr_a2
, dr_b2
);
1600 poly_int64 init_a1
, init_a2
;
1601 /* Only consider cases in which the distance between the initial
1602 DR_A1 and the initial DR_A2 is known at compile time. */
1603 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1
->dr
),
1604 DR_BASE_ADDRESS (dr_a2
->dr
), 0)
1605 || !operand_equal_p (DR_OFFSET (dr_a1
->dr
),
1606 DR_OFFSET (dr_a2
->dr
), 0)
1607 || !poly_int_tree_p (DR_INIT (dr_a1
->dr
), &init_a1
)
1608 || !poly_int_tree_p (DR_INIT (dr_a2
->dr
), &init_a2
))
1611 /* Don't combine if we can't tell which one comes first. */
1612 if (!ordered_p (init_a1
, init_a2
))
1615 /* Work out what the segment length would be if we did combine
1618 - If DR_A1 and DR_A2 have equal lengths, that length is
1619 also the combined length.
1621 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1622 length is the lower bound on those lengths.
1624 - If DR_A1 and DR_A2 both have positive lengths, the combined
1625 length is the upper bound on those lengths.
1627 Other cases are unlikely to give a useful combination.
1629 The lengths both have sizetype, so the sign is taken from
1630 the step instead. */
1631 poly_uint64 new_seg_len
= 0;
1632 bool new_seg_len_p
= !operand_equal_p (dr_a1
->seg_len
,
1636 poly_uint64 seg_len_a1
, seg_len_a2
;
1637 if (!poly_int_tree_p (dr_a1
->seg_len
, &seg_len_a1
)
1638 || !poly_int_tree_p (dr_a2
->seg_len
, &seg_len_a2
))
1641 tree indicator_a
= dr_direction_indicator (dr_a1
->dr
);
1642 if (TREE_CODE (indicator_a
) != INTEGER_CST
)
1645 tree indicator_b
= dr_direction_indicator (dr_a2
->dr
);
1646 if (TREE_CODE (indicator_b
) != INTEGER_CST
)
1649 int sign_a
= tree_int_cst_sgn (indicator_a
);
1650 int sign_b
= tree_int_cst_sgn (indicator_b
);
1652 if (sign_a
<= 0 && sign_b
<= 0)
1653 new_seg_len
= lower_bound (seg_len_a1
, seg_len_a2
);
1654 else if (sign_a
>= 0 && sign_b
>= 0)
1655 new_seg_len
= upper_bound (seg_len_a1
, seg_len_a2
);
1659 /* At this point we're committed to merging the refs. */
1661 /* Make sure dr_a1 starts left of dr_a2. */
1662 if (maybe_gt (init_a1
, init_a2
))
1664 std::swap (*dr_a1
, *dr_a2
);
1665 std::swap (init_a1
, init_a2
);
1668 /* The DR_Bs are equal, so only the DR_As can introduce
1670 if (!operand_equal_p (DR_STEP (dr_a1
->dr
), DR_STEP (dr_a2
->dr
), 0))
1671 alias_pair1
->flags
|= DR_ALIAS_MIXED_STEPS
;
1675 dr_a1
->seg_len
= build_int_cst (TREE_TYPE (dr_a1
->seg_len
),
1677 dr_a1
->align
= MIN (dr_a1
->align
, known_alignment (new_seg_len
));
1680 /* This is always positive due to the swap above. */
1681 poly_uint64 diff
= init_a2
- init_a1
;
1683 /* The new check will start at DR_A1. Make sure that its access
1684 size encompasses the initial DR_A2. */
1685 if (maybe_lt (dr_a1
->access_size
, diff
+ dr_a2
->access_size
))
1687 dr_a1
->access_size
= upper_bound (dr_a1
->access_size
,
1688 diff
+ dr_a2
->access_size
);
1689 unsigned int new_align
= known_alignment (dr_a1
->access_size
);
1690 dr_a1
->align
= MIN (dr_a1
->align
, new_align
);
1692 if (dump_enabled_p ())
1693 dump_printf (MSG_NOTE
, "merging ranges for %T, %T and %T, %T\n",
1694 DR_REF (dr_a1
->dr
), DR_REF (dr_b1
->dr
),
1695 DR_REF (dr_a2
->dr
), DR_REF (dr_b2
->dr
));
1696 alias_pair1
->flags
|= alias_pair2
->flags
;
1697 alias_pairs
->ordered_remove (i
);
1702 /* Try to restore the original dr_with_seg_len order within each
1703 dr_with_seg_len_pair_t. If we ended up combining swapped and
1704 unswapped pairs into the same check, we have to invalidate any
1705 RAW, WAR and WAW information for it. */
1706 if (dump_enabled_p ())
1707 dump_printf (MSG_NOTE
, "merged alias checks:\n");
1708 FOR_EACH_VEC_ELT (*alias_pairs
, i
, alias_pair
)
1710 unsigned int swap_mask
= (DR_ALIAS_SWAPPED
| DR_ALIAS_UNSWAPPED
);
1711 unsigned int swapped
= (alias_pair
->flags
& swap_mask
);
1712 if (swapped
== DR_ALIAS_SWAPPED
)
1713 std::swap (alias_pair
->first
, alias_pair
->second
);
1714 else if (swapped
!= DR_ALIAS_UNSWAPPED
)
1715 alias_pair
->flags
|= DR_ALIAS_ARBITRARY
;
1716 alias_pair
->flags
&= ~swap_mask
;
1717 if (dump_enabled_p ())
1718 dump_alias_pair (alias_pair
, " ");
1722 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
1723 free of aliases, using a condition based on index values instead
1724 of a condition based on addresses. Return true on success,
1725 storing the condition in *COND_EXPR.
1727 This can only be done if the two data references in ALIAS_PAIR access
1728 the same array object and the index is the only difference. For example,
1729 if the two data references are DR_A and DR_B:
1732 data-ref arr[i] arr[j]
1734 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1736 The addresses and their index are like:
1738 |<- ADDR_A ->| |<- ADDR_B ->|
1739 ------------------------------------------------------->
1741 ------------------------------------------------------->
1742 i_0 ... i_0+4 j_0 ... j_0+4
1744 We can create expression based on index rather than address:
1746 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1748 Note evolution step of index needs to be considered in comparison. */
1751 create_intersect_range_checks_index (class loop
*loop
, tree
*cond_expr
,
1752 const dr_with_seg_len_pair_t
&alias_pair
)
1754 const dr_with_seg_len
&dr_a
= alias_pair
.first
;
1755 const dr_with_seg_len
&dr_b
= alias_pair
.second
;
1756 if ((alias_pair
.flags
& DR_ALIAS_MIXED_STEPS
)
1757 || integer_zerop (DR_STEP (dr_a
.dr
))
1758 || integer_zerop (DR_STEP (dr_b
.dr
))
1759 || DR_NUM_DIMENSIONS (dr_a
.dr
) != DR_NUM_DIMENSIONS (dr_b
.dr
))
1762 poly_uint64 seg_len1
, seg_len2
;
1763 if (!poly_int_tree_p (dr_a
.seg_len
, &seg_len1
)
1764 || !poly_int_tree_p (dr_b
.seg_len
, &seg_len2
))
1767 if (!tree_fits_shwi_p (DR_STEP (dr_a
.dr
)))
1770 if (!operand_equal_p (DR_BASE_OBJECT (dr_a
.dr
), DR_BASE_OBJECT (dr_b
.dr
), 0))
1773 if (!operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0))
1776 gcc_assert (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
);
1778 bool neg_step
= tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0;
1779 unsigned HOST_WIDE_INT abs_step
= tree_to_shwi (DR_STEP (dr_a
.dr
));
1782 abs_step
= -abs_step
;
1783 seg_len1
= -seg_len1
;
1784 seg_len2
= -seg_len2
;
1788 /* Include the access size in the length, so that we only have one
1789 tree addition below. */
1790 seg_len1
+= dr_a
.access_size
;
1791 seg_len2
+= dr_b
.access_size
;
1794 /* Infer the number of iterations with which the memory segment is accessed
1795 by DR. In other words, alias is checked if memory segment accessed by
1796 DR_A in some iterations intersect with memory segment accessed by DR_B
1797 in the same amount iterations.
1798 Note segnment length is a linear function of number of iterations with
1799 DR_STEP as the coefficient. */
1800 poly_uint64 niter_len1
, niter_len2
;
1801 if (!can_div_trunc_p (seg_len1
+ abs_step
- 1, abs_step
, &niter_len1
)
1802 || !can_div_trunc_p (seg_len2
+ abs_step
- 1, abs_step
, &niter_len2
))
1805 poly_uint64 niter_access1
= 0, niter_access2
= 0;
1808 /* Divide each access size by the byte step, rounding up. */
1809 if (!can_div_trunc_p (dr_a
.access_size
- abs_step
- 1,
1810 abs_step
, &niter_access1
)
1811 || !can_div_trunc_p (dr_b
.access_size
+ abs_step
- 1,
1812 abs_step
, &niter_access2
))
1817 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr_a
.dr
); i
++)
1819 tree access1
= DR_ACCESS_FN (dr_a
.dr
, i
);
1820 tree access2
= DR_ACCESS_FN (dr_b
.dr
, i
);
1821 /* Two indices must be the same if they are not scev, or not scev wrto
1822 current loop being vecorized. */
1823 if (TREE_CODE (access1
) != POLYNOMIAL_CHREC
1824 || TREE_CODE (access2
) != POLYNOMIAL_CHREC
1825 || CHREC_VARIABLE (access1
) != (unsigned)loop
->num
1826 || CHREC_VARIABLE (access2
) != (unsigned)loop
->num
)
1828 if (operand_equal_p (access1
, access2
, 0))
1833 /* The two indices must have the same step. */
1834 if (!operand_equal_p (CHREC_RIGHT (access1
), CHREC_RIGHT (access2
), 0))
1837 tree idx_step
= CHREC_RIGHT (access1
);
1838 /* Index must have const step, otherwise DR_STEP won't be constant. */
1839 gcc_assert (TREE_CODE (idx_step
) == INTEGER_CST
);
1840 /* Index must evaluate in the same direction as DR. */
1841 gcc_assert (!neg_step
|| tree_int_cst_sign_bit (idx_step
) == 1);
1843 tree min1
= CHREC_LEFT (access1
);
1844 tree min2
= CHREC_LEFT (access2
);
1845 if (!types_compatible_p (TREE_TYPE (min1
), TREE_TYPE (min2
)))
1848 /* Ideally, alias can be checked against loop's control IV, but we
1849 need to prove linear mapping between control IV and reference
1850 index. Although that should be true, we check against (array)
1851 index of data reference. Like segment length, index length is
1852 linear function of the number of iterations with index_step as
1853 the coefficient, i.e, niter_len * idx_step. */
1854 tree idx_len1
= fold_build2 (MULT_EXPR
, TREE_TYPE (min1
), idx_step
,
1855 build_int_cst (TREE_TYPE (min1
),
1857 tree idx_len2
= fold_build2 (MULT_EXPR
, TREE_TYPE (min2
), idx_step
,
1858 build_int_cst (TREE_TYPE (min2
),
1860 tree max1
= fold_build2 (PLUS_EXPR
, TREE_TYPE (min1
), min1
, idx_len1
);
1861 tree max2
= fold_build2 (PLUS_EXPR
, TREE_TYPE (min2
), min2
, idx_len2
);
1862 /* Adjust ranges for negative step. */
1865 /* IDX_LEN1 and IDX_LEN2 are negative in this case. */
1866 std::swap (min1
, max1
);
1867 std::swap (min2
, max2
);
1869 /* As with the lengths just calculated, we've measured the access
1870 sizes in iterations, so multiply them by the index step. */
1872 = fold_build2 (MULT_EXPR
, TREE_TYPE (min1
), idx_step
,
1873 build_int_cst (TREE_TYPE (min1
), niter_access1
));
1875 = fold_build2 (MULT_EXPR
, TREE_TYPE (min2
), idx_step
,
1876 build_int_cst (TREE_TYPE (min2
), niter_access2
));
1878 /* MINUS_EXPR because the above values are negative. */
1879 max1
= fold_build2 (MINUS_EXPR
, TREE_TYPE (max1
), max1
, idx_access1
);
1880 max2
= fold_build2 (MINUS_EXPR
, TREE_TYPE (max2
), max2
, idx_access2
);
1883 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
1884 fold_build2 (LE_EXPR
, boolean_type_node
, max1
, min2
),
1885 fold_build2 (LE_EXPR
, boolean_type_node
, max2
, min1
));
1887 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1888 *cond_expr
, part_cond_expr
);
1890 *cond_expr
= part_cond_expr
;
1892 if (dump_enabled_p ())
1893 dump_printf (MSG_NOTE
, "using an index-based overlap test\n");
1897 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1898 every address ADDR accessed by D:
1900 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1902 In this case, every element accessed by D is aligned to at least
1905 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1907 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
1910 get_segment_min_max (const dr_with_seg_len
&d
, tree
*seg_min_out
,
1911 tree
*seg_max_out
, HOST_WIDE_INT align
)
1913 /* Each access has the following pattern:
1916 <--- A: -ve step --->
1917 +-----+-------+-----+-------+-----+
1918 | n-1 | ,.... | 0 | ..... | n-1 |
1919 +-----+-------+-----+-------+-----+
1920 <--- B: +ve step --->
1925 where "n" is the number of scalar iterations covered by the segment.
1926 (This should be VF for a particular pair if we know that both steps
1927 are the same, otherwise it will be the full number of scalar loop
1930 A is the range of bytes accessed when the step is negative,
1931 B is the range when the step is positive.
1933 If the access size is "access_size" bytes, the lowest addressed byte is:
1935 base + (step < 0 ? seg_len : 0) [LB]
1937 and the highest addressed byte is always below:
1939 base + (step < 0 ? 0 : seg_len) + access_size [UB]
1945 If ALIGN is nonzero, all three values are aligned to at least ALIGN
1948 LB <= ADDR <= UB - ALIGN
1950 where "- ALIGN" folds naturally with the "+ access_size" and often
1953 We don't try to simplify LB and UB beyond this (e.g. by using
1954 MIN and MAX based on whether seg_len rather than the stride is
1955 negative) because it is possible for the absolute size of the
1956 segment to overflow the range of a ssize_t.
1958 Keeping the pointer_plus outside of the cond_expr should allow
1959 the cond_exprs to be shared with other alias checks. */
1960 tree indicator
= dr_direction_indicator (d
.dr
);
1961 tree neg_step
= fold_build2 (LT_EXPR
, boolean_type_node
,
1962 fold_convert (ssizetype
, indicator
),
1964 tree addr_base
= fold_build_pointer_plus (DR_BASE_ADDRESS (d
.dr
),
1966 addr_base
= fold_build_pointer_plus (addr_base
, DR_INIT (d
.dr
));
1968 = fold_convert (sizetype
, rewrite_to_non_trapping_overflow (d
.seg_len
));
1970 tree min_reach
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
1971 seg_len
, size_zero_node
);
1972 tree max_reach
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
1973 size_zero_node
, seg_len
);
1974 max_reach
= fold_build2 (PLUS_EXPR
, sizetype
, max_reach
,
1975 size_int (d
.access_size
- align
));
1977 *seg_min_out
= fold_build_pointer_plus (addr_base
, min_reach
);
1978 *seg_max_out
= fold_build_pointer_plus (addr_base
, max_reach
);
1981 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
1982 storing the condition in *COND_EXPR. The fallback is to generate a
1983 a test that the two accesses do not overlap:
1985 end_a <= start_b || end_b <= start_a. */
1988 create_intersect_range_checks (class loop
*loop
, tree
*cond_expr
,
1989 const dr_with_seg_len_pair_t
&alias_pair
)
1991 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
1992 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
1993 *cond_expr
= NULL_TREE
;
1994 if (create_intersect_range_checks_index (loop
, cond_expr
, alias_pair
))
1997 unsigned HOST_WIDE_INT min_align
;
1999 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2000 are equivalent. This is just an optimization heuristic. */
2001 if (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
2002 && TREE_CODE (DR_STEP (dr_b
.dr
)) == INTEGER_CST
)
2004 /* In this case adding access_size to seg_len is likely to give
2005 a simple X * step, where X is either the number of scalar
2006 iterations or the vectorization factor. We're better off
2007 keeping that, rather than subtracting an alignment from it.
2009 In this case the maximum values are exclusive and so there is
2010 no alias if the maximum of one segment equals the minimum
2017 /* Calculate the minimum alignment shared by all four pointers,
2018 then arrange for this alignment to be subtracted from the
2019 exclusive maximum values to get inclusive maximum values.
2020 This "- min_align" is cumulative with a "+ access_size"
2021 in the calculation of the maximum values. In the best
2022 (and common) case, the two cancel each other out, leaving
2023 us with an inclusive bound based only on seg_len. In the
2024 worst case we're simply adding a smaller number than before.
2026 Because the maximum values are inclusive, there is an alias
2027 if the maximum value of one segment is equal to the minimum
2028 value of the other. */
2029 min_align
= MIN (dr_a
.align
, dr_b
.align
);
2033 tree seg_a_min
, seg_a_max
, seg_b_min
, seg_b_max
;
2034 get_segment_min_max (dr_a
, &seg_a_min
, &seg_a_max
, min_align
);
2035 get_segment_min_max (dr_b
, &seg_b_min
, &seg_b_max
, min_align
);
2038 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2039 fold_build2 (cmp_code
, boolean_type_node
, seg_a_max
, seg_b_min
),
2040 fold_build2 (cmp_code
, boolean_type_node
, seg_b_max
, seg_a_min
));
2041 if (dump_enabled_p ())
2042 dump_printf (MSG_NOTE
, "using an address-based overlap test\n");
2045 /* Create a conditional expression that represents the run-time checks for
2046 overlapping of address ranges represented by a list of data references
2047 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2048 COND_EXPR is the conditional expression to be used in the if statement
2049 that controls which version of the loop gets executed at runtime. */
2052 create_runtime_alias_checks (class loop
*loop
,
2053 vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
2056 tree part_cond_expr
;
2058 fold_defer_overflow_warnings ();
2059 dr_with_seg_len_pair_t
*alias_pair
;
2061 FOR_EACH_VEC_ELT (*alias_pairs
, i
, alias_pair
)
2063 gcc_assert (alias_pair
->flags
);
2064 if (dump_enabled_p ())
2065 dump_printf (MSG_NOTE
,
2066 "create runtime check for data references %T and %T\n",
2067 DR_REF (alias_pair
->first
.dr
),
2068 DR_REF (alias_pair
->second
.dr
));
2070 /* Create condition expression for each pair data references. */
2071 create_intersect_range_checks (loop
, &part_cond_expr
, *alias_pair
);
2073 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2074 *cond_expr
, part_cond_expr
);
2076 *cond_expr
= part_cond_expr
;
2078 fold_undefer_and_ignore_overflow_warnings ();
2081 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2084 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
2088 STRIP_NOPS (offset1
);
2089 STRIP_NOPS (offset2
);
2091 if (offset1
== offset2
)
2094 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
2095 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
2098 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
2099 TREE_OPERAND (offset2
, 0));
2101 if (!res
|| !BINARY_CLASS_P (offset1
))
2104 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
2105 TREE_OPERAND (offset2
, 1));
2110 /* Check if DRA and DRB have equal offsets. */
2112 dr_equal_offsets_p (struct data_reference
*dra
,
2113 struct data_reference
*drb
)
2115 tree offset1
, offset2
;
2117 offset1
= DR_OFFSET (dra
);
2118 offset2
= DR_OFFSET (drb
);
2120 return dr_equal_offsets_p1 (offset1
, offset2
);
2123 /* Returns true if FNA == FNB. */
2126 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
2128 unsigned i
, n
= fna
.length ();
2130 if (n
!= fnb
.length ())
2133 for (i
= 0; i
< n
; i
++)
2134 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
2140 /* If all the functions in CF are the same, returns one of them,
2141 otherwise returns NULL. */
2144 common_affine_function (conflict_function
*cf
)
2149 if (!CF_NONTRIVIAL_P (cf
))
2150 return affine_fn ();
2154 for (i
= 1; i
< cf
->n
; i
++)
2155 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
2156 return affine_fn ();
2161 /* Returns the base of the affine function FN. */
2164 affine_function_base (affine_fn fn
)
2169 /* Returns true if FN is a constant. */
2172 affine_function_constant_p (affine_fn fn
)
2177 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
2178 if (!integer_zerop (coef
))
2184 /* Returns true if FN is the zero constant function. */
2187 affine_function_zero_p (affine_fn fn
)
2189 return (integer_zerop (affine_function_base (fn
))
2190 && affine_function_constant_p (fn
));
2193 /* Returns a signed integer type with the largest precision from TA
2197 signed_type_for_types (tree ta
, tree tb
)
2199 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
2200 return signed_type_for (ta
);
2202 return signed_type_for (tb
);
2205 /* Applies operation OP on affine functions FNA and FNB, and returns the
2209 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
2215 if (fnb
.length () > fna
.length ())
2227 for (i
= 0; i
< n
; i
++)
2229 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
2230 TREE_TYPE (fnb
[i
]));
2231 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
2234 for (; fna
.iterate (i
, &coef
); i
++)
2235 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
2236 coef
, integer_zero_node
));
2237 for (; fnb
.iterate (i
, &coef
); i
++)
2238 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
2239 integer_zero_node
, coef
));
2244 /* Returns the sum of affine functions FNA and FNB. */
2247 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
2249 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
2252 /* Returns the difference of affine functions FNA and FNB. */
2255 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
2257 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
2260 /* Frees affine function FN. */
2263 affine_fn_free (affine_fn fn
)
2268 /* Determine for each subscript in the data dependence relation DDR
2272 compute_subscript_distance (struct data_dependence_relation
*ddr
)
2274 conflict_function
*cf_a
, *cf_b
;
2275 affine_fn fn_a
, fn_b
, diff
;
2277 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
2281 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2283 struct subscript
*subscript
;
2285 subscript
= DDR_SUBSCRIPT (ddr
, i
);
2286 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
2287 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
2289 fn_a
= common_affine_function (cf_a
);
2290 fn_b
= common_affine_function (cf_b
);
2291 if (!fn_a
.exists () || !fn_b
.exists ())
2293 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2296 diff
= affine_fn_minus (fn_a
, fn_b
);
2298 if (affine_function_constant_p (diff
))
2299 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
2301 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2303 affine_fn_free (diff
);
2308 /* Returns the conflict function for "unknown". */
2310 static conflict_function
*
2311 conflict_fn_not_known (void)
2313 conflict_function
*fn
= XCNEW (conflict_function
);
2319 /* Returns the conflict function for "independent". */
2321 static conflict_function
*
2322 conflict_fn_no_dependence (void)
2324 conflict_function
*fn
= XCNEW (conflict_function
);
2325 fn
->n
= NO_DEPENDENCE
;
2330 /* Returns true if the address of OBJ is invariant in LOOP. */
2333 object_address_invariant_in_loop_p (const class loop
*loop
, const_tree obj
)
2335 while (handled_component_p (obj
))
2337 if (TREE_CODE (obj
) == ARRAY_REF
)
2339 for (int i
= 1; i
< 4; ++i
)
2340 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, i
),
2344 else if (TREE_CODE (obj
) == COMPONENT_REF
)
2346 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
2350 obj
= TREE_OPERAND (obj
, 0);
2353 if (!INDIRECT_REF_P (obj
)
2354 && TREE_CODE (obj
) != MEM_REF
)
2357 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
2361 /* Returns false if we can prove that data references A and B do not alias,
2362 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2366 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
2367 class loop
*loop_nest
)
2369 tree addr_a
= DR_BASE_OBJECT (a
);
2370 tree addr_b
= DR_BASE_OBJECT (b
);
2372 /* If we are not processing a loop nest but scalar code we
2373 do not need to care about possible cross-iteration dependences
2374 and thus can process the full original reference. Do so,
2375 similar to how loop invariant motion applies extra offset-based
2379 aff_tree off1
, off2
;
2380 poly_widest_int size1
, size2
;
2381 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
2382 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
2383 aff_combination_scale (&off1
, -1);
2384 aff_combination_add (&off2
, &off1
);
2385 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
2389 if ((TREE_CODE (addr_a
) == MEM_REF
|| TREE_CODE (addr_a
) == TARGET_MEM_REF
)
2390 && (TREE_CODE (addr_b
) == MEM_REF
|| TREE_CODE (addr_b
) == TARGET_MEM_REF
)
2391 /* For cross-iteration dependences the cliques must be valid for the
2392 whole loop, not just individual iterations. */
2394 || MR_DEPENDENCE_CLIQUE (addr_a
) == 1
2395 || MR_DEPENDENCE_CLIQUE (addr_a
) == loop_nest
->owned_clique
)
2396 && MR_DEPENDENCE_CLIQUE (addr_a
) == MR_DEPENDENCE_CLIQUE (addr_b
)
2397 && MR_DEPENDENCE_BASE (addr_a
) != MR_DEPENDENCE_BASE (addr_b
))
2400 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2401 do not know the size of the base-object. So we cannot do any
2402 offset/overlap based analysis but have to rely on points-to
2403 information only. */
2404 if (TREE_CODE (addr_a
) == MEM_REF
2405 && (DR_UNCONSTRAINED_BASE (a
)
2406 || TREE_CODE (TREE_OPERAND (addr_a
, 0)) == SSA_NAME
))
2408 /* For true dependences we can apply TBAA. */
2409 if (flag_strict_aliasing
2410 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
2411 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
2412 get_alias_set (DR_REF (b
))))
2414 if (TREE_CODE (addr_b
) == MEM_REF
)
2415 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
2416 TREE_OPERAND (addr_b
, 0));
2418 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
2419 build_fold_addr_expr (addr_b
));
2421 else if (TREE_CODE (addr_b
) == MEM_REF
2422 && (DR_UNCONSTRAINED_BASE (b
)
2423 || TREE_CODE (TREE_OPERAND (addr_b
, 0)) == SSA_NAME
))
2425 /* For true dependences we can apply TBAA. */
2426 if (flag_strict_aliasing
2427 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
2428 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
2429 get_alias_set (DR_REF (b
))))
2431 if (TREE_CODE (addr_a
) == MEM_REF
)
2432 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
2433 TREE_OPERAND (addr_b
, 0));
2435 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
2436 TREE_OPERAND (addr_b
, 0));
2439 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2440 that is being subsetted in the loop nest. */
2441 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
2442 return refs_output_dependent_p (addr_a
, addr_b
);
2443 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
2444 return refs_anti_dependent_p (addr_a
, addr_b
);
2445 return refs_may_alias_p (addr_a
, addr_b
);
2448 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2449 if it is meaningful to compare their associated access functions
2450 when checking for dependencies. */
2453 access_fn_components_comparable_p (tree ref_a
, tree ref_b
)
2455 /* Allow pairs of component refs from the following sets:
2457 { REALPART_EXPR, IMAGPART_EXPR }
2460 tree_code code_a
= TREE_CODE (ref_a
);
2461 tree_code code_b
= TREE_CODE (ref_b
);
2462 if (code_a
== IMAGPART_EXPR
)
2463 code_a
= REALPART_EXPR
;
2464 if (code_b
== IMAGPART_EXPR
)
2465 code_b
= REALPART_EXPR
;
2466 if (code_a
!= code_b
)
2469 if (TREE_CODE (ref_a
) == COMPONENT_REF
)
2470 /* ??? We cannot simply use the type of operand #0 of the refs here as
2471 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2472 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2473 return (DECL_CONTEXT (TREE_OPERAND (ref_a
, 1))
2474 == DECL_CONTEXT (TREE_OPERAND (ref_b
, 1)));
2476 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a
, 0)),
2477 TREE_TYPE (TREE_OPERAND (ref_b
, 0)));
2480 /* Initialize a data dependence relation between data accesses A and
2481 B. NB_LOOPS is the number of loops surrounding the references: the
2482 size of the classic distance/direction vectors. */
2484 struct data_dependence_relation
*
2485 initialize_data_dependence_relation (struct data_reference
*a
,
2486 struct data_reference
*b
,
2487 vec
<loop_p
> loop_nest
)
2489 struct data_dependence_relation
*res
;
2492 res
= XCNEW (struct data_dependence_relation
);
2495 DDR_LOOP_NEST (res
).create (0);
2496 DDR_SUBSCRIPTS (res
).create (0);
2497 DDR_DIR_VECTS (res
).create (0);
2498 DDR_DIST_VECTS (res
).create (0);
2500 if (a
== NULL
|| b
== NULL
)
2502 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
2506 /* If the data references do not alias, then they are independent. */
2507 if (!dr_may_alias_p (a
, b
, loop_nest
.exists () ? loop_nest
[0] : NULL
))
2509 DDR_ARE_DEPENDENT (res
) = chrec_known
;
2513 unsigned int num_dimensions_a
= DR_NUM_DIMENSIONS (a
);
2514 unsigned int num_dimensions_b
= DR_NUM_DIMENSIONS (b
);
2515 if (num_dimensions_a
== 0 || num_dimensions_b
== 0)
2517 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
2521 /* For unconstrained bases, the root (highest-indexed) subscript
2522 describes a variation in the base of the original DR_REF rather
2523 than a component access. We have no type that accurately describes
2524 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2525 applying this subscript) so limit the search to the last real
2531 f (int a[][8], int b[][8])
2533 for (int i = 0; i < 8; ++i)
2534 a[i * 2][0] = b[i][0];
2537 the a and b accesses have a single ARRAY_REF component reference [0]
2538 but have two subscripts. */
2539 if (DR_UNCONSTRAINED_BASE (a
))
2540 num_dimensions_a
-= 1;
2541 if (DR_UNCONSTRAINED_BASE (b
))
2542 num_dimensions_b
-= 1;
2544 /* These structures describe sequences of component references in
2545 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2546 specific access function. */
2548 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2549 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2550 indices. In C notation, these are the indices of the rightmost
2551 component references; e.g. for a sequence .b.c.d, the start
2553 unsigned int start_a
;
2554 unsigned int start_b
;
2556 /* The sequence contains LENGTH consecutive access functions from
2558 unsigned int length
;
2560 /* The enclosing objects for the A and B sequences respectively,
2561 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2562 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2565 } full_seq
= {}, struct_seq
= {};
2567 /* Before each iteration of the loop:
2569 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2570 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2571 unsigned int index_a
= 0;
2572 unsigned int index_b
= 0;
2573 tree ref_a
= DR_REF (a
);
2574 tree ref_b
= DR_REF (b
);
2576 /* Now walk the component references from the final DR_REFs back up to
2577 the enclosing base objects. Each component reference corresponds
2578 to one access function in the DR, with access function 0 being for
2579 the final DR_REF and the highest-indexed access function being the
2580 one that is applied to the base of the DR.
2582 Look for a sequence of component references whose access functions
2583 are comparable (see access_fn_components_comparable_p). If more
2584 than one such sequence exists, pick the one nearest the base
2585 (which is the leftmost sequence in C notation). Store this sequence
2588 For example, if we have:
2590 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2593 B: __real b[0][i].s.e[i].f
2595 (where d is the same type as the real component of f) then the access
2602 B: __real .f [i] .e .s [i]
2604 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2605 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2606 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2607 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2608 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2609 index foo[10] arrays, so is again comparable. The sequence is
2612 A: [1, 3] (i.e. [i].s.c)
2613 B: [3, 5] (i.e. [i].s.e)
2615 Also look for sequences of component references whose access
2616 functions are comparable and whose enclosing objects have the same
2617 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2618 example, STRUCT_SEQ would be:
2620 A: [1, 2] (i.e. s.c)
2621 B: [3, 4] (i.e. s.e) */
2622 while (index_a
< num_dimensions_a
&& index_b
< num_dimensions_b
)
2624 /* REF_A and REF_B must be one of the component access types
2625 allowed by dr_analyze_indices. */
2626 gcc_checking_assert (access_fn_component_p (ref_a
));
2627 gcc_checking_assert (access_fn_component_p (ref_b
));
2629 /* Get the immediately-enclosing objects for REF_A and REF_B,
2630 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2631 and DR_ACCESS_FN (B, INDEX_B). */
2632 tree object_a
= TREE_OPERAND (ref_a
, 0);
2633 tree object_b
= TREE_OPERAND (ref_b
, 0);
2635 tree type_a
= TREE_TYPE (object_a
);
2636 tree type_b
= TREE_TYPE (object_b
);
2637 if (access_fn_components_comparable_p (ref_a
, ref_b
))
2639 /* This pair of component accesses is comparable for dependence
2640 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2641 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2642 if (full_seq
.start_a
+ full_seq
.length
!= index_a
2643 || full_seq
.start_b
+ full_seq
.length
!= index_b
)
2645 /* The accesses don't extend the current sequence,
2646 so start a new one here. */
2647 full_seq
.start_a
= index_a
;
2648 full_seq
.start_b
= index_b
;
2649 full_seq
.length
= 0;
2652 /* Add this pair of references to the sequence. */
2653 full_seq
.length
+= 1;
2654 full_seq
.object_a
= object_a
;
2655 full_seq
.object_b
= object_b
;
2657 /* If the enclosing objects are structures (and thus have the
2658 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2659 if (TREE_CODE (type_a
) == RECORD_TYPE
)
2660 struct_seq
= full_seq
;
2662 /* Move to the next containing reference for both A and B. */
2670 /* Try to approach equal type sizes. */
2671 if (!COMPLETE_TYPE_P (type_a
)
2672 || !COMPLETE_TYPE_P (type_b
)
2673 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a
))
2674 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b
)))
2677 unsigned HOST_WIDE_INT size_a
= tree_to_uhwi (TYPE_SIZE_UNIT (type_a
));
2678 unsigned HOST_WIDE_INT size_b
= tree_to_uhwi (TYPE_SIZE_UNIT (type_b
));
2679 if (size_a
<= size_b
)
2684 if (size_b
<= size_a
)
2691 /* See whether FULL_SEQ ends at the base and whether the two bases
2692 are equal. We do not care about TBAA or alignment info so we can
2693 use OEP_ADDRESS_OF to avoid false negatives. */
2694 tree base_a
= DR_BASE_OBJECT (a
);
2695 tree base_b
= DR_BASE_OBJECT (b
);
2696 bool same_base_p
= (full_seq
.start_a
+ full_seq
.length
== num_dimensions_a
2697 && full_seq
.start_b
+ full_seq
.length
== num_dimensions_b
2698 && DR_UNCONSTRAINED_BASE (a
) == DR_UNCONSTRAINED_BASE (b
)
2699 && operand_equal_p (base_a
, base_b
, OEP_ADDRESS_OF
)
2700 && types_compatible_p (TREE_TYPE (base_a
),
2702 && (!loop_nest
.exists ()
2703 || (object_address_invariant_in_loop_p
2704 (loop_nest
[0], base_a
))));
2706 /* If the bases are the same, we can include the base variation too.
2707 E.g. the b accesses in:
2709 for (int i = 0; i < n; ++i)
2710 b[i + 4][0] = b[i][0];
2712 have a definite dependence distance of 4, while for:
2714 for (int i = 0; i < n; ++i)
2715 a[i + 4][0] = b[i][0];
2717 the dependence distance depends on the gap between a and b.
2719 If the bases are different then we can only rely on the sequence
2720 rooted at a structure access, since arrays are allowed to overlap
2721 arbitrarily and change shape arbitrarily. E.g. we treat this as
2726 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2728 where two lvalues with the same int[4][3] type overlap, and where
2729 both lvalues are distinct from the object's declared type. */
2732 if (DR_UNCONSTRAINED_BASE (a
))
2733 full_seq
.length
+= 1;
2736 full_seq
= struct_seq
;
2738 /* Punt if we didn't find a suitable sequence. */
2739 if (full_seq
.length
== 0)
2741 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
2747 /* Partial overlap is possible for different bases when strict aliasing
2748 is not in effect. It's also possible if either base involves a union
2751 struct s1 { int a[2]; };
2752 struct s2 { struct s1 b; int c; };
2753 struct s3 { int d; struct s1 e; };
2754 union u { struct s2 f; struct s3 g; } *p, *q;
2756 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2757 "p->g.e" (base "p->g") and might partially overlap the s1 at
2758 "q->g.e" (base "q->g"). */
2759 if (!flag_strict_aliasing
2760 || ref_contains_union_access_p (full_seq
.object_a
)
2761 || ref_contains_union_access_p (full_seq
.object_b
))
2763 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
2767 DDR_COULD_BE_INDEPENDENT_P (res
) = true;
2768 if (!loop_nest
.exists ()
2769 || (object_address_invariant_in_loop_p (loop_nest
[0],
2771 && object_address_invariant_in_loop_p (loop_nest
[0],
2772 full_seq
.object_b
)))
2774 DDR_OBJECT_A (res
) = full_seq
.object_a
;
2775 DDR_OBJECT_B (res
) = full_seq
.object_b
;
2779 DDR_AFFINE_P (res
) = true;
2780 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
2781 DDR_SUBSCRIPTS (res
).create (full_seq
.length
);
2782 DDR_LOOP_NEST (res
) = loop_nest
;
2783 DDR_SELF_REFERENCE (res
) = false;
2785 for (i
= 0; i
< full_seq
.length
; ++i
)
2787 struct subscript
*subscript
;
2789 subscript
= XNEW (struct subscript
);
2790 SUB_ACCESS_FN (subscript
, 0) = DR_ACCESS_FN (a
, full_seq
.start_a
+ i
);
2791 SUB_ACCESS_FN (subscript
, 1) = DR_ACCESS_FN (b
, full_seq
.start_b
+ i
);
2792 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
2793 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
2794 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
2795 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2796 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
2802 /* Frees memory used by the conflict function F. */
2805 free_conflict_function (conflict_function
*f
)
2809 if (CF_NONTRIVIAL_P (f
))
2811 for (i
= 0; i
< f
->n
; i
++)
2812 affine_fn_free (f
->fns
[i
]);
2817 /* Frees memory used by SUBSCRIPTS. */
2820 free_subscripts (vec
<subscript_p
> subscripts
)
2825 FOR_EACH_VEC_ELT (subscripts
, i
, s
)
2827 free_conflict_function (s
->conflicting_iterations_in_a
);
2828 free_conflict_function (s
->conflicting_iterations_in_b
);
2831 subscripts
.release ();
2834 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2838 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
2841 DDR_ARE_DEPENDENT (ddr
) = chrec
;
2842 free_subscripts (DDR_SUBSCRIPTS (ddr
));
2843 DDR_SUBSCRIPTS (ddr
).create (0);
2846 /* The dependence relation DDR cannot be represented by a distance
2850 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
2852 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2853 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
2855 DDR_AFFINE_P (ddr
) = false;
2860 /* This section contains the classic Banerjee tests. */
2862 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2863 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2866 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
2868 return (evolution_function_is_constant_p (chrec_a
)
2869 && evolution_function_is_constant_p (chrec_b
));
2872 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2873 variable, i.e., if the SIV (Single Index Variable) test is true. */
2876 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
2878 if ((evolution_function_is_constant_p (chrec_a
)
2879 && evolution_function_is_univariate_p (chrec_b
))
2880 || (evolution_function_is_constant_p (chrec_b
)
2881 && evolution_function_is_univariate_p (chrec_a
)))
2884 if (evolution_function_is_univariate_p (chrec_a
)
2885 && evolution_function_is_univariate_p (chrec_b
))
2887 switch (TREE_CODE (chrec_a
))
2889 case POLYNOMIAL_CHREC
:
2890 switch (TREE_CODE (chrec_b
))
2892 case POLYNOMIAL_CHREC
:
2893 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
2909 /* Creates a conflict function with N dimensions. The affine functions
2910 in each dimension follow. */
2912 static conflict_function
*
2913 conflict_fn (unsigned n
, ...)
2916 conflict_function
*ret
= XCNEW (conflict_function
);
2919 gcc_assert (n
> 0 && n
<= MAX_DIM
);
2923 for (i
= 0; i
< n
; i
++)
2924 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
2930 /* Returns constant affine function with value CST. */
2933 affine_fn_cst (tree cst
)
2937 fn
.quick_push (cst
);
2941 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2944 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
2947 fn
.create (dim
+ 1);
2950 gcc_assert (dim
> 0);
2951 fn
.quick_push (cst
);
2952 for (i
= 1; i
< dim
; i
++)
2953 fn
.quick_push (integer_zero_node
);
2954 fn
.quick_push (coef
);
2958 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2959 *OVERLAPS_B are initialized to the functions that describe the
2960 relation between the elements accessed twice by CHREC_A and
2961 CHREC_B. For k >= 0, the following property is verified:
2963 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2966 analyze_ziv_subscript (tree chrec_a
,
2968 conflict_function
**overlaps_a
,
2969 conflict_function
**overlaps_b
,
2970 tree
*last_conflicts
)
2972 tree type
, difference
;
2973 dependence_stats
.num_ziv
++;
2975 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2976 fprintf (dump_file
, "(analyze_ziv_subscript \n");
2978 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2979 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2980 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2981 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2983 switch (TREE_CODE (difference
))
2986 if (integer_zerop (difference
))
2988 /* The difference is equal to zero: the accessed index
2989 overlaps for each iteration in the loop. */
2990 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2991 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2992 *last_conflicts
= chrec_dont_know
;
2993 dependence_stats
.num_ziv_dependent
++;
2997 /* The accesses do not overlap. */
2998 *overlaps_a
= conflict_fn_no_dependence ();
2999 *overlaps_b
= conflict_fn_no_dependence ();
3000 *last_conflicts
= integer_zero_node
;
3001 dependence_stats
.num_ziv_independent
++;
3006 /* We're not sure whether the indexes overlap. For the moment,
3007 conservatively answer "don't know". */
3008 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3009 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
3011 *overlaps_a
= conflict_fn_not_known ();
3012 *overlaps_b
= conflict_fn_not_known ();
3013 *last_conflicts
= chrec_dont_know
;
3014 dependence_stats
.num_ziv_unimplemented
++;
3018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3019 fprintf (dump_file
, ")\n");
3022 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3023 and only if it fits to the int type. If this is not the case, or the
3024 bound on the number of iterations of LOOP could not be derived, returns
3028 max_stmt_executions_tree (class loop
*loop
)
3032 if (!max_stmt_executions (loop
, &nit
))
3033 return chrec_dont_know
;
3035 if (!wi::fits_to_tree_p (nit
, unsigned_type_node
))
3036 return chrec_dont_know
;
3038 return wide_int_to_tree (unsigned_type_node
, nit
);
3041 /* Determine whether the CHREC is always positive/negative. If the expression
3042 cannot be statically analyzed, return false, otherwise set the answer into
3046 chrec_is_positive (tree chrec
, bool *value
)
3048 bool value0
, value1
, value2
;
3049 tree end_value
, nb_iter
;
3051 switch (TREE_CODE (chrec
))
3053 case POLYNOMIAL_CHREC
:
3054 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
3055 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
3058 /* FIXME -- overflows. */
3059 if (value0
== value1
)
3065 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3066 and the proof consists in showing that the sign never
3067 changes during the execution of the loop, from 0 to
3068 loop->nb_iterations. */
3069 if (!evolution_function_is_affine_p (chrec
))
3072 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
3073 if (chrec_contains_undetermined (nb_iter
))
3077 /* TODO -- If the test is after the exit, we may decrease the number of
3078 iterations by one. */
3080 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
3083 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
3085 if (!chrec_is_positive (end_value
, &value2
))
3089 return value0
== value1
;
3092 switch (tree_int_cst_sgn (chrec
))
3111 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3112 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3113 *OVERLAPS_B are initialized to the functions that describe the
3114 relation between the elements accessed twice by CHREC_A and
3115 CHREC_B. For k >= 0, the following property is verified:
3117 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3120 analyze_siv_subscript_cst_affine (tree chrec_a
,
3122 conflict_function
**overlaps_a
,
3123 conflict_function
**overlaps_b
,
3124 tree
*last_conflicts
)
3126 bool value0
, value1
, value2
;
3127 tree type
, difference
, tmp
;
3129 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
3130 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
3131 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
3132 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
3134 /* Special case overlap in the first iteration. */
3135 if (integer_zerop (difference
))
3137 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3138 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3139 *last_conflicts
= integer_one_node
;
3143 if (!chrec_is_positive (initial_condition (difference
), &value0
))
3145 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3146 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
3148 dependence_stats
.num_siv_unimplemented
++;
3149 *overlaps_a
= conflict_fn_not_known ();
3150 *overlaps_b
= conflict_fn_not_known ();
3151 *last_conflicts
= chrec_dont_know
;
3156 if (value0
== false)
3158 if (TREE_CODE (chrec_b
) != POLYNOMIAL_CHREC
3159 || !chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
3161 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3162 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
3164 *overlaps_a
= conflict_fn_not_known ();
3165 *overlaps_b
= conflict_fn_not_known ();
3166 *last_conflicts
= chrec_dont_know
;
3167 dependence_stats
.num_siv_unimplemented
++;
3176 chrec_b = {10, +, 1}
3179 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
3181 HOST_WIDE_INT numiter
;
3182 class loop
*loop
= get_chrec_loop (chrec_b
);
3184 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3185 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
3186 fold_build1 (ABS_EXPR
, type
, difference
),
3187 CHREC_RIGHT (chrec_b
));
3188 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
3189 *last_conflicts
= integer_one_node
;
3192 /* Perform weak-zero siv test to see if overlap is
3193 outside the loop bounds. */
3194 numiter
= max_stmt_executions_int (loop
);
3197 && compare_tree_int (tmp
, numiter
) > 0)
3199 free_conflict_function (*overlaps_a
);
3200 free_conflict_function (*overlaps_b
);
3201 *overlaps_a
= conflict_fn_no_dependence ();
3202 *overlaps_b
= conflict_fn_no_dependence ();
3203 *last_conflicts
= integer_zero_node
;
3204 dependence_stats
.num_siv_independent
++;
3207 dependence_stats
.num_siv_dependent
++;
3211 /* When the step does not divide the difference, there are
3215 *overlaps_a
= conflict_fn_no_dependence ();
3216 *overlaps_b
= conflict_fn_no_dependence ();
3217 *last_conflicts
= integer_zero_node
;
3218 dependence_stats
.num_siv_independent
++;
3227 chrec_b = {10, +, -1}
3229 In this case, chrec_a will not overlap with chrec_b. */
3230 *overlaps_a
= conflict_fn_no_dependence ();
3231 *overlaps_b
= conflict_fn_no_dependence ();
3232 *last_conflicts
= integer_zero_node
;
3233 dependence_stats
.num_siv_independent
++;
3240 if (TREE_CODE (chrec_b
) != POLYNOMIAL_CHREC
3241 || !chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
3243 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3244 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
3246 *overlaps_a
= conflict_fn_not_known ();
3247 *overlaps_b
= conflict_fn_not_known ();
3248 *last_conflicts
= chrec_dont_know
;
3249 dependence_stats
.num_siv_unimplemented
++;
3254 if (value2
== false)
3258 chrec_b = {10, +, -1}
3260 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
3262 HOST_WIDE_INT numiter
;
3263 class loop
*loop
= get_chrec_loop (chrec_b
);
3265 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3266 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
3267 CHREC_RIGHT (chrec_b
));
3268 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
3269 *last_conflicts
= integer_one_node
;
3271 /* Perform weak-zero siv test to see if overlap is
3272 outside the loop bounds. */
3273 numiter
= max_stmt_executions_int (loop
);
3276 && compare_tree_int (tmp
, numiter
) > 0)
3278 free_conflict_function (*overlaps_a
);
3279 free_conflict_function (*overlaps_b
);
3280 *overlaps_a
= conflict_fn_no_dependence ();
3281 *overlaps_b
= conflict_fn_no_dependence ();
3282 *last_conflicts
= integer_zero_node
;
3283 dependence_stats
.num_siv_independent
++;
3286 dependence_stats
.num_siv_dependent
++;
3290 /* When the step does not divide the difference, there
3294 *overlaps_a
= conflict_fn_no_dependence ();
3295 *overlaps_b
= conflict_fn_no_dependence ();
3296 *last_conflicts
= integer_zero_node
;
3297 dependence_stats
.num_siv_independent
++;
3307 In this case, chrec_a will not overlap with chrec_b. */
3308 *overlaps_a
= conflict_fn_no_dependence ();
3309 *overlaps_b
= conflict_fn_no_dependence ();
3310 *last_conflicts
= integer_zero_node
;
3311 dependence_stats
.num_siv_independent
++;
3319 /* Helper recursive function for initializing the matrix A. Returns
3320 the initial value of CHREC. */
3323 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
3327 switch (TREE_CODE (chrec
))
3329 case POLYNOMIAL_CHREC
:
3330 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec
)))
3331 return chrec_dont_know
;
3332 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
3333 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
3339 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
3340 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
3342 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
3347 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
3348 return chrec_convert (chrec_type (chrec
), op
, NULL
);
3353 /* Handle ~X as -1 - X. */
3354 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
3355 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
3356 build_int_cst (TREE_TYPE (chrec
), -1), op
);
3368 #define FLOOR_DIV(x,y) ((x) / (y))
3370 /* Solves the special case of the Diophantine equation:
3371 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3373 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3374 number of iterations that loops X and Y run. The overlaps will be
3375 constructed as evolutions in dimension DIM. */
3378 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter
,
3379 HOST_WIDE_INT step_a
,
3380 HOST_WIDE_INT step_b
,
3381 affine_fn
*overlaps_a
,
3382 affine_fn
*overlaps_b
,
3383 tree
*last_conflicts
, int dim
)
3385 if (((step_a
> 0 && step_b
> 0)
3386 || (step_a
< 0 && step_b
< 0)))
3388 HOST_WIDE_INT step_overlaps_a
, step_overlaps_b
;
3389 HOST_WIDE_INT gcd_steps_a_b
, last_conflict
, tau2
;
3391 gcd_steps_a_b
= gcd (step_a
, step_b
);
3392 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
3393 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
3397 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
3398 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
3399 last_conflict
= tau2
;
3400 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
3403 *last_conflicts
= chrec_dont_know
;
3405 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
3406 build_int_cst (NULL_TREE
,
3408 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
3409 build_int_cst (NULL_TREE
,
3415 *overlaps_a
= affine_fn_cst (integer_zero_node
);
3416 *overlaps_b
= affine_fn_cst (integer_zero_node
);
3417 *last_conflicts
= integer_zero_node
;
3421 /* Solves the special case of a Diophantine equation where CHREC_A is
3422 an affine bivariate function, and CHREC_B is an affine univariate
3423 function. For example,
3425 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3427 has the following overlapping functions:
3429 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3430 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3431 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3433 FORNOW: This is a specialized implementation for a case occurring in
3434 a common benchmark. Implement the general algorithm. */
3437 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
3438 conflict_function
**overlaps_a
,
3439 conflict_function
**overlaps_b
,
3440 tree
*last_conflicts
)
3442 bool xz_p
, yz_p
, xyz_p
;
3443 HOST_WIDE_INT step_x
, step_y
, step_z
;
3444 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
3445 affine_fn overlaps_a_xz
, overlaps_b_xz
;
3446 affine_fn overlaps_a_yz
, overlaps_b_yz
;
3447 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
3448 affine_fn ova1
, ova2
, ovb
;
3449 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
3451 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
3452 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
3453 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
3455 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
3456 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
3457 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
3459 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
3461 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3462 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
3464 *overlaps_a
= conflict_fn_not_known ();
3465 *overlaps_b
= conflict_fn_not_known ();
3466 *last_conflicts
= chrec_dont_know
;
3470 niter
= MIN (niter_x
, niter_z
);
3471 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
3474 &last_conflicts_xz
, 1);
3475 niter
= MIN (niter_y
, niter_z
);
3476 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
3479 &last_conflicts_yz
, 2);
3480 niter
= MIN (niter_x
, niter_z
);
3481 niter
= MIN (niter_y
, niter
);
3482 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
3485 &last_conflicts_xyz
, 3);
3487 xz_p
= !integer_zerop (last_conflicts_xz
);
3488 yz_p
= !integer_zerop (last_conflicts_yz
);
3489 xyz_p
= !integer_zerop (last_conflicts_xyz
);
3491 if (xz_p
|| yz_p
|| xyz_p
)
3493 ova1
= affine_fn_cst (integer_zero_node
);
3494 ova2
= affine_fn_cst (integer_zero_node
);
3495 ovb
= affine_fn_cst (integer_zero_node
);
3498 affine_fn t0
= ova1
;
3501 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
3502 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
3503 affine_fn_free (t0
);
3504 affine_fn_free (t2
);
3505 *last_conflicts
= last_conflicts_xz
;
3509 affine_fn t0
= ova2
;
3512 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
3513 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
3514 affine_fn_free (t0
);
3515 affine_fn_free (t2
);
3516 *last_conflicts
= last_conflicts_yz
;
3520 affine_fn t0
= ova1
;
3521 affine_fn t2
= ova2
;
3524 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
3525 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
3526 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
3527 affine_fn_free (t0
);
3528 affine_fn_free (t2
);
3529 affine_fn_free (t4
);
3530 *last_conflicts
= last_conflicts_xyz
;
3532 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
3533 *overlaps_b
= conflict_fn (1, ovb
);
3537 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3538 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3539 *last_conflicts
= integer_zero_node
;
3542 affine_fn_free (overlaps_a_xz
);
3543 affine_fn_free (overlaps_b_xz
);
3544 affine_fn_free (overlaps_a_yz
);
3545 affine_fn_free (overlaps_b_yz
);
3546 affine_fn_free (overlaps_a_xyz
);
3547 affine_fn_free (overlaps_b_xyz
);
3550 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3553 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
3556 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
3559 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3562 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
3567 for (i
= 0; i
< m
; i
++)
3568 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
3571 /* Store the N x N identity matrix in MAT. */
3574 lambda_matrix_id (lambda_matrix mat
, int size
)
3578 for (i
= 0; i
< size
; i
++)
3579 for (j
= 0; j
< size
; j
++)
3580 mat
[i
][j
] = (i
== j
) ? 1 : 0;
3583 /* Return the index of the first nonzero element of vector VEC1 between
3584 START and N. We must have START <= N.
3585 Returns N if VEC1 is the zero vector. */
3588 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
3591 while (j
< n
&& vec1
[j
] == 0)
3596 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3597 R2 = R2 + CONST1 * R1. */
3600 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
,
3608 for (i
= 0; i
< n
; i
++)
3609 mat
[r2
][i
] += const1
* mat
[r1
][i
];
3612 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3613 and store the result in VEC2. */
3616 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
3617 int size
, lambda_int const1
)
3622 lambda_vector_clear (vec2
, size
);
3624 for (i
= 0; i
< size
; i
++)
3625 vec2
[i
] = const1
* vec1
[i
];
3628 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3631 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
3634 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
3637 /* Negate row R1 of matrix MAT which has N columns. */
3640 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
3642 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
3645 /* Return true if two vectors are equal. */
3648 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
3651 for (i
= 0; i
< size
; i
++)
3652 if (vec1
[i
] != vec2
[i
])
3657 /* Given an M x N integer matrix A, this function determines an M x
3658 M unimodular matrix U, and an M x N echelon matrix S such that
3659 "U.A = S". This decomposition is also known as "right Hermite".
3661 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3662 Restructuring Compilers" Utpal Banerjee. */
3665 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
3666 lambda_matrix S
, lambda_matrix U
)
3670 lambda_matrix_copy (A
, S
, m
, n
);
3671 lambda_matrix_id (U
, m
);
3673 for (j
= 0; j
< n
; j
++)
3675 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
3678 for (i
= m
- 1; i
>= i0
; i
--)
3680 while (S
[i
][j
] != 0)
3682 lambda_int sigma
, factor
, a
, b
;
3686 sigma
= (a
* b
< 0) ? -1: 1;
3689 factor
= sigma
* (a
/ b
);
3691 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
3692 std::swap (S
[i
], S
[i
-1]);
3694 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
3695 std::swap (U
[i
], U
[i
-1]);
3702 /* Determines the overlapping elements due to accesses CHREC_A and
3703 CHREC_B, that are affine functions. This function cannot handle
3704 symbolic evolution functions, ie. when initial conditions are
3705 parameters, because it uses lambda matrices of integers. */
3708 analyze_subscript_affine_affine (tree chrec_a
,
3710 conflict_function
**overlaps_a
,
3711 conflict_function
**overlaps_b
,
3712 tree
*last_conflicts
)
3714 unsigned nb_vars_a
, nb_vars_b
, dim
;
3715 HOST_WIDE_INT gamma
, gcd_alpha_beta
;
3716 lambda_matrix A
, U
, S
;
3717 struct obstack scratch_obstack
;
3719 if (eq_evolutions_p (chrec_a
, chrec_b
))
3721 /* The accessed index overlaps for each iteration in the
3723 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3724 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3725 *last_conflicts
= chrec_dont_know
;
3728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3729 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
3731 /* For determining the initial intersection, we have to solve a
3732 Diophantine equation. This is the most time consuming part.
3734 For answering to the question: "Is there a dependence?" we have
3735 to prove that there exists a solution to the Diophantine
3736 equation, and that the solution is in the iteration domain,
3737 i.e. the solution is positive or zero, and that the solution
3738 happens before the upper bound loop.nb_iterations. Otherwise
3739 there is no dependence. This function outputs a description of
3740 the iterations that hold the intersections. */
3742 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
3743 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
3745 gcc_obstack_init (&scratch_obstack
);
3747 dim
= nb_vars_a
+ nb_vars_b
;
3748 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
3749 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
3750 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
3752 tree init_a
= initialize_matrix_A (A
, chrec_a
, 0, 1);
3753 tree init_b
= initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1);
3754 if (init_a
== chrec_dont_know
3755 || init_b
== chrec_dont_know
)
3757 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3758 fprintf (dump_file
, "affine-affine test failed: "
3759 "representation issue.\n");
3760 *overlaps_a
= conflict_fn_not_known ();
3761 *overlaps_b
= conflict_fn_not_known ();
3762 *last_conflicts
= chrec_dont_know
;
3763 goto end_analyze_subs_aa
;
3765 gamma
= int_cst_value (init_b
) - int_cst_value (init_a
);
3767 /* Don't do all the hard work of solving the Diophantine equation
3768 when we already know the solution: for example,
3771 | gamma = 3 - 3 = 0.
3772 Then the first overlap occurs during the first iterations:
3773 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3777 if (nb_vars_a
== 1 && nb_vars_b
== 1)
3779 HOST_WIDE_INT step_a
, step_b
;
3780 HOST_WIDE_INT niter
, niter_a
, niter_b
;
3783 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
3784 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
3785 niter
= MIN (niter_a
, niter_b
);
3786 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
3787 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
3789 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
3792 *overlaps_a
= conflict_fn (1, ova
);
3793 *overlaps_b
= conflict_fn (1, ovb
);
3796 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
3797 compute_overlap_steps_for_affine_1_2
3798 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
3800 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
3801 compute_overlap_steps_for_affine_1_2
3802 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
3806 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3807 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
3808 *overlaps_a
= conflict_fn_not_known ();
3809 *overlaps_b
= conflict_fn_not_known ();
3810 *last_conflicts
= chrec_dont_know
;
3812 goto end_analyze_subs_aa
;
3816 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
3821 lambda_matrix_row_negate (U
, dim
, 0);
3823 gcd_alpha_beta
= S
[0][0];
3825 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3826 but that is a quite strange case. Instead of ICEing, answer
3828 if (gcd_alpha_beta
== 0)
3830 *overlaps_a
= conflict_fn_not_known ();
3831 *overlaps_b
= conflict_fn_not_known ();
3832 *last_conflicts
= chrec_dont_know
;
3833 goto end_analyze_subs_aa
;
3836 /* The classic "gcd-test". */
3837 if (!int_divides_p (gcd_alpha_beta
, gamma
))
3839 /* The "gcd-test" has determined that there is no integer
3840 solution, i.e. there is no dependence. */
3841 *overlaps_a
= conflict_fn_no_dependence ();
3842 *overlaps_b
= conflict_fn_no_dependence ();
3843 *last_conflicts
= integer_zero_node
;
3846 /* Both access functions are univariate. This includes SIV and MIV cases. */
3847 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
3849 /* Both functions should have the same evolution sign. */
3850 if (((A
[0][0] > 0 && -A
[1][0] > 0)
3851 || (A
[0][0] < 0 && -A
[1][0] < 0)))
3853 /* The solutions are given by:
3855 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3858 For a given integer t. Using the following variables,
3860 | i0 = u11 * gamma / gcd_alpha_beta
3861 | j0 = u12 * gamma / gcd_alpha_beta
3868 | y0 = j0 + j1 * t. */
3869 HOST_WIDE_INT i0
, j0
, i1
, j1
;
3871 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
3872 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
3876 if ((i1
== 0 && i0
< 0)
3877 || (j1
== 0 && j0
< 0))
3879 /* There is no solution.
3880 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3881 falls in here, but for the moment we don't look at the
3882 upper bound of the iteration domain. */
3883 *overlaps_a
= conflict_fn_no_dependence ();
3884 *overlaps_b
= conflict_fn_no_dependence ();
3885 *last_conflicts
= integer_zero_node
;
3886 goto end_analyze_subs_aa
;
3889 if (i1
> 0 && j1
> 0)
3891 HOST_WIDE_INT niter_a
3892 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
3893 HOST_WIDE_INT niter_b
3894 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
3895 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
3897 /* (X0, Y0) is a solution of the Diophantine equation:
3898 "chrec_a (X0) = chrec_b (Y0)". */
3899 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
3901 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
3902 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
3904 /* (X1, Y1) is the smallest positive solution of the eq
3905 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3906 first conflict occurs. */
3907 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
3908 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
3909 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
3913 /* If the overlap occurs outside of the bounds of the
3914 loop, there is no dependence. */
3915 if (x1
>= niter_a
|| y1
>= niter_b
)
3917 *overlaps_a
= conflict_fn_no_dependence ();
3918 *overlaps_b
= conflict_fn_no_dependence ();
3919 *last_conflicts
= integer_zero_node
;
3920 goto end_analyze_subs_aa
;
3923 /* max stmt executions can get quite large, avoid
3924 overflows by using wide ints here. */
3926 = wi::smin (wi::sdiv_floor (wi::sub (niter_a
, i0
), i1
),
3927 wi::sdiv_floor (wi::sub (niter_b
, j0
), j1
));
3928 widest_int last_conflict
= wi::sub (tau2
, (x1
- i0
)/i1
);
3929 if (wi::min_precision (last_conflict
, SIGNED
)
3930 <= TYPE_PRECISION (integer_type_node
))
3932 = build_int_cst (integer_type_node
,
3933 last_conflict
.to_shwi ());
3935 *last_conflicts
= chrec_dont_know
;
3938 *last_conflicts
= chrec_dont_know
;
3942 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
3944 build_int_cst (NULL_TREE
, i1
)));
3947 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
3949 build_int_cst (NULL_TREE
, j1
)));
3953 /* FIXME: For the moment, the upper bound of the
3954 iteration domain for i and j is not checked. */
3955 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3956 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
3957 *overlaps_a
= conflict_fn_not_known ();
3958 *overlaps_b
= conflict_fn_not_known ();
3959 *last_conflicts
= chrec_dont_know
;
3964 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3965 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
3966 *overlaps_a
= conflict_fn_not_known ();
3967 *overlaps_b
= conflict_fn_not_known ();
3968 *last_conflicts
= chrec_dont_know
;
3973 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3974 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
3975 *overlaps_a
= conflict_fn_not_known ();
3976 *overlaps_b
= conflict_fn_not_known ();
3977 *last_conflicts
= chrec_dont_know
;
3980 end_analyze_subs_aa
:
3981 obstack_free (&scratch_obstack
, NULL
);
3982 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3984 fprintf (dump_file
, " (overlaps_a = ");
3985 dump_conflict_function (dump_file
, *overlaps_a
);
3986 fprintf (dump_file
, ")\n (overlaps_b = ");
3987 dump_conflict_function (dump_file
, *overlaps_b
);
3988 fprintf (dump_file
, "))\n");
3992 /* Returns true when analyze_subscript_affine_affine can be used for
3993 determining the dependence relation between chrec_a and chrec_b,
3994 that contain symbols. This function modifies chrec_a and chrec_b
3995 such that the analysis result is the same, and such that they don't
3996 contain symbols, and then can safely be passed to the analyzer.
3998 Example: The analysis of the following tuples of evolutions produce
3999 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4002 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4003 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4007 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
4009 tree diff
, type
, left_a
, left_b
, right_b
;
4011 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
4012 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
4013 /* FIXME: For the moment not handled. Might be refined later. */
4016 type
= chrec_type (*chrec_a
);
4017 left_a
= CHREC_LEFT (*chrec_a
);
4018 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
4019 diff
= chrec_fold_minus (type
, left_a
, left_b
);
4021 if (!evolution_function_is_constant_p (diff
))
4024 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4025 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
4027 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
4028 diff
, CHREC_RIGHT (*chrec_a
));
4029 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
4030 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
4031 build_int_cst (type
, 0),
4036 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4037 *OVERLAPS_B are initialized to the functions that describe the
4038 relation between the elements accessed twice by CHREC_A and
4039 CHREC_B. For k >= 0, the following property is verified:
4041 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4044 analyze_siv_subscript (tree chrec_a
,
4046 conflict_function
**overlaps_a
,
4047 conflict_function
**overlaps_b
,
4048 tree
*last_conflicts
,
4051 dependence_stats
.num_siv
++;
4053 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4054 fprintf (dump_file
, "(analyze_siv_subscript \n");
4056 if (evolution_function_is_constant_p (chrec_a
)
4057 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
4058 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
4059 overlaps_a
, overlaps_b
, last_conflicts
);
4061 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
4062 && evolution_function_is_constant_p (chrec_b
))
4063 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
4064 overlaps_b
, overlaps_a
, last_conflicts
);
4066 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
4067 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
4069 if (!chrec_contains_symbols (chrec_a
)
4070 && !chrec_contains_symbols (chrec_b
))
4072 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4073 overlaps_a
, overlaps_b
,
4076 if (CF_NOT_KNOWN_P (*overlaps_a
)
4077 || CF_NOT_KNOWN_P (*overlaps_b
))
4078 dependence_stats
.num_siv_unimplemented
++;
4079 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4080 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4081 dependence_stats
.num_siv_independent
++;
4083 dependence_stats
.num_siv_dependent
++;
4085 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
4088 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4089 overlaps_a
, overlaps_b
,
4092 if (CF_NOT_KNOWN_P (*overlaps_a
)
4093 || CF_NOT_KNOWN_P (*overlaps_b
))
4094 dependence_stats
.num_siv_unimplemented
++;
4095 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4096 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4097 dependence_stats
.num_siv_independent
++;
4099 dependence_stats
.num_siv_dependent
++;
4102 goto siv_subscript_dontknow
;
4107 siv_subscript_dontknow
:;
4108 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4109 fprintf (dump_file
, " siv test failed: unimplemented");
4110 *overlaps_a
= conflict_fn_not_known ();
4111 *overlaps_b
= conflict_fn_not_known ();
4112 *last_conflicts
= chrec_dont_know
;
4113 dependence_stats
.num_siv_unimplemented
++;
4116 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4117 fprintf (dump_file
, ")\n");
4120 /* Returns false if we can prove that the greatest common divisor of the steps
4121 of CHREC does not divide CST, false otherwise. */
4124 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
4126 HOST_WIDE_INT cd
= 0, val
;
4129 if (!tree_fits_shwi_p (cst
))
4131 val
= tree_to_shwi (cst
);
4133 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
4135 step
= CHREC_RIGHT (chrec
);
4136 if (!tree_fits_shwi_p (step
))
4138 cd
= gcd (cd
, tree_to_shwi (step
));
4139 chrec
= CHREC_LEFT (chrec
);
4142 return val
% cd
== 0;
4145 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4146 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4147 functions that describe the relation between the elements accessed
4148 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4151 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4154 analyze_miv_subscript (tree chrec_a
,
4156 conflict_function
**overlaps_a
,
4157 conflict_function
**overlaps_b
,
4158 tree
*last_conflicts
,
4159 class loop
*loop_nest
)
4161 tree type
, difference
;
4163 dependence_stats
.num_miv
++;
4164 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4165 fprintf (dump_file
, "(analyze_miv_subscript \n");
4167 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
4168 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
4169 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
4170 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
4172 if (eq_evolutions_p (chrec_a
, chrec_b
))
4174 /* Access functions are the same: all the elements are accessed
4175 in the same order. */
4176 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4177 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4178 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
4179 dependence_stats
.num_miv_dependent
++;
4182 else if (evolution_function_is_constant_p (difference
)
4183 && evolution_function_is_affine_multivariate_p (chrec_a
,
4185 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
4187 /* testsuite/.../ssa-chrec-33.c
4188 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4190 The difference is 1, and all the evolution steps are multiples
4191 of 2, consequently there are no overlapping elements. */
4192 *overlaps_a
= conflict_fn_no_dependence ();
4193 *overlaps_b
= conflict_fn_no_dependence ();
4194 *last_conflicts
= integer_zero_node
;
4195 dependence_stats
.num_miv_independent
++;
4198 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest
->num
)
4199 && !chrec_contains_symbols (chrec_a
, loop_nest
)
4200 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest
->num
)
4201 && !chrec_contains_symbols (chrec_b
, loop_nest
))
4203 /* testsuite/.../ssa-chrec-35.c
4204 {0, +, 1}_2 vs. {0, +, 1}_3
4205 the overlapping elements are respectively located at iterations:
4206 {0, +, 1}_x and {0, +, 1}_x,
4207 in other words, we have the equality:
4208 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4211 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4212 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4214 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4215 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4217 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4218 overlaps_a
, overlaps_b
, last_conflicts
);
4220 if (CF_NOT_KNOWN_P (*overlaps_a
)
4221 || CF_NOT_KNOWN_P (*overlaps_b
))
4222 dependence_stats
.num_miv_unimplemented
++;
4223 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4224 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4225 dependence_stats
.num_miv_independent
++;
4227 dependence_stats
.num_miv_dependent
++;
4232 /* When the analysis is too difficult, answer "don't know". */
4233 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4234 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
4236 *overlaps_a
= conflict_fn_not_known ();
4237 *overlaps_b
= conflict_fn_not_known ();
4238 *last_conflicts
= chrec_dont_know
;
4239 dependence_stats
.num_miv_unimplemented
++;
4242 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4243 fprintf (dump_file
, ")\n");
4246 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4247 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4248 OVERLAP_ITERATIONS_B are initialized with two functions that
4249 describe the iterations that contain conflicting elements.
4251 Remark: For an integer k >= 0, the following equality is true:
4253 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4257 analyze_overlapping_iterations (tree chrec_a
,
4259 conflict_function
**overlap_iterations_a
,
4260 conflict_function
**overlap_iterations_b
,
4261 tree
*last_conflicts
, class loop
*loop_nest
)
4263 unsigned int lnn
= loop_nest
->num
;
4265 dependence_stats
.num_subscript_tests
++;
4267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4269 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
4270 fprintf (dump_file
, " (chrec_a = ");
4271 print_generic_expr (dump_file
, chrec_a
);
4272 fprintf (dump_file
, ")\n (chrec_b = ");
4273 print_generic_expr (dump_file
, chrec_b
);
4274 fprintf (dump_file
, ")\n");
4277 if (chrec_a
== NULL_TREE
4278 || chrec_b
== NULL_TREE
4279 || chrec_contains_undetermined (chrec_a
)
4280 || chrec_contains_undetermined (chrec_b
))
4282 dependence_stats
.num_subscript_undetermined
++;
4284 *overlap_iterations_a
= conflict_fn_not_known ();
4285 *overlap_iterations_b
= conflict_fn_not_known ();
4288 /* If they are the same chrec, and are affine, they overlap
4289 on every iteration. */
4290 else if (eq_evolutions_p (chrec_a
, chrec_b
)
4291 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
4292 || operand_equal_p (chrec_a
, chrec_b
, 0)))
4294 dependence_stats
.num_same_subscript_function
++;
4295 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4296 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4297 *last_conflicts
= chrec_dont_know
;
4300 /* If they aren't the same, and aren't affine, we can't do anything
4302 else if ((chrec_contains_symbols (chrec_a
)
4303 || chrec_contains_symbols (chrec_b
))
4304 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
4305 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
4307 dependence_stats
.num_subscript_undetermined
++;
4308 *overlap_iterations_a
= conflict_fn_not_known ();
4309 *overlap_iterations_b
= conflict_fn_not_known ();
4312 else if (ziv_subscript_p (chrec_a
, chrec_b
))
4313 analyze_ziv_subscript (chrec_a
, chrec_b
,
4314 overlap_iterations_a
, overlap_iterations_b
,
4317 else if (siv_subscript_p (chrec_a
, chrec_b
))
4318 analyze_siv_subscript (chrec_a
, chrec_b
,
4319 overlap_iterations_a
, overlap_iterations_b
,
4320 last_conflicts
, lnn
);
4323 analyze_miv_subscript (chrec_a
, chrec_b
,
4324 overlap_iterations_a
, overlap_iterations_b
,
4325 last_conflicts
, loop_nest
);
4327 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4329 fprintf (dump_file
, " (overlap_iterations_a = ");
4330 dump_conflict_function (dump_file
, *overlap_iterations_a
);
4331 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
4332 dump_conflict_function (dump_file
, *overlap_iterations_b
);
4333 fprintf (dump_file
, "))\n");
4337 /* Helper function for uniquely inserting distance vectors. */
4340 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
4345 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, v
)
4346 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
4349 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
4352 /* Helper function for uniquely inserting direction vectors. */
4355 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
4360 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), i
, v
)
4361 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
4364 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
4367 /* Add a distance of 1 on all the loops outer than INDEX. If we
4368 haven't yet determined a distance for this outer loop, push a new
4369 distance vector composed of the previous distance, and a distance
4370 of 1 for this outer loop. Example:
4378 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4379 save (0, 1), then we have to save (1, 0). */
4382 add_outer_distances (struct data_dependence_relation
*ddr
,
4383 lambda_vector dist_v
, int index
)
4385 /* For each outer loop where init_v is not set, the accesses are
4386 in dependence of distance 1 in the loop. */
4387 while (--index
>= 0)
4389 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4390 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
4392 save_dist_v (ddr
, save_v
);
4396 /* Return false when fail to represent the data dependence as a
4397 distance vector. A_INDEX is the index of the first reference
4398 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4399 second reference. INIT_B is set to true when a component has been
4400 added to the distance vector DIST_V. INDEX_CARRY is then set to
4401 the index in DIST_V that carries the dependence. */
4404 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
4405 unsigned int a_index
, unsigned int b_index
,
4406 lambda_vector dist_v
, bool *init_b
,
4410 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4411 class loop
*loop
= DDR_LOOP_NEST (ddr
)[0];
4413 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
4415 tree access_fn_a
, access_fn_b
;
4416 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
4418 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
4420 non_affine_dependence_relation (ddr
);
4424 access_fn_a
= SUB_ACCESS_FN (subscript
, a_index
);
4425 access_fn_b
= SUB_ACCESS_FN (subscript
, b_index
);
4427 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
4428 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
4432 int var_a
= CHREC_VARIABLE (access_fn_a
);
4433 int var_b
= CHREC_VARIABLE (access_fn_b
);
4436 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
4438 non_affine_dependence_relation (ddr
);
4442 /* When data references are collected in a loop while data
4443 dependences are analyzed in loop nest nested in the loop, we
4444 would have more number of access functions than number of
4445 loops. Skip access functions of loops not in the loop nest.
4447 See PR89725 for more information. */
4448 if (flow_loop_nested_p (get_loop (cfun
, var_a
), loop
))
4451 dist
= int_cst_value (SUB_DISTANCE (subscript
));
4452 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
4453 *index_carry
= MIN (index
, *index_carry
);
4455 /* This is the subscript coupling test. If we have already
4456 recorded a distance for this loop (a distance coming from
4457 another subscript), it should be the same. For example,
4458 in the following code, there is no dependence:
4465 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
4467 finalize_ddr_dependent (ddr
, chrec_known
);
4471 dist_v
[index
] = dist
;
4475 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
4477 /* This can be for example an affine vs. constant dependence
4478 (T[i] vs. T[3]) that is not an affine dependence and is
4479 not representable as a distance vector. */
4480 non_affine_dependence_relation (ddr
);
4488 /* Return true when the DDR contains only constant access functions. */
4491 constant_access_functions (const struct data_dependence_relation
*ddr
)
4496 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
4497 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub
, 0))
4498 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub
, 1)))
4504 /* Helper function for the case where DDR_A and DDR_B are the same
4505 multivariate access function with a constant step. For an example
4509 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
4512 tree c_1
= CHREC_LEFT (c_2
);
4513 tree c_0
= CHREC_LEFT (c_1
);
4514 lambda_vector dist_v
;
4515 HOST_WIDE_INT v1
, v2
, cd
;
4517 /* Polynomials with more than 2 variables are not handled yet. When
4518 the evolution steps are parameters, it is not possible to
4519 represent the dependence using classical distance vectors. */
4520 if (TREE_CODE (c_0
) != INTEGER_CST
4521 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
4522 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
4524 DDR_AFFINE_P (ddr
) = false;
4528 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
4529 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
4531 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4532 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4533 v1
= int_cst_value (CHREC_RIGHT (c_1
));
4534 v2
= int_cst_value (CHREC_RIGHT (c_2
));
4547 save_dist_v (ddr
, dist_v
);
4549 add_outer_distances (ddr
, dist_v
, x_1
);
4552 /* Helper function for the case where DDR_A and DDR_B are the same
4553 access functions. */
4556 add_other_self_distances (struct data_dependence_relation
*ddr
)
4558 lambda_vector dist_v
;
4560 int index_carry
= DDR_NB_LOOPS (ddr
);
4562 class loop
*loop
= DDR_LOOP_NEST (ddr
)[0];
4564 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
4566 tree access_fun
= SUB_ACCESS_FN (sub
, 0);
4568 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
4570 if (!evolution_function_is_univariate_p (access_fun
, loop
->num
))
4572 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
4574 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
4578 access_fun
= SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr
, 0), 0);
4580 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
4581 add_multivariate_self_dist (ddr
, access_fun
);
4583 /* The evolution step is not constant: it varies in
4584 the outer loop, so this cannot be represented by a
4585 distance vector. For example in pr34635.c the
4586 evolution is {0, +, {0, +, 4}_1}_2. */
4587 DDR_AFFINE_P (ddr
) = false;
4592 /* When data references are collected in a loop while data
4593 dependences are analyzed in loop nest nested in the loop, we
4594 would have more number of access functions than number of
4595 loops. Skip access functions of loops not in the loop nest.
4597 See PR89725 for more information. */
4598 if (flow_loop_nested_p (get_loop (cfun
, CHREC_VARIABLE (access_fun
)),
4602 index_carry
= MIN (index_carry
,
4603 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
4604 DDR_LOOP_NEST (ddr
)));
4608 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4609 add_outer_distances (ddr
, dist_v
, index_carry
);
4613 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
4615 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4618 save_dist_v (ddr
, dist_v
);
4621 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4622 is the case for example when access functions are the same and
4623 equal to a constant, as in:
4630 in which case the distance vectors are (0) and (1). */
4633 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
4637 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
4639 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
4640 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
4641 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
4643 for (j
= 0; j
< ca
->n
; j
++)
4644 if (affine_function_zero_p (ca
->fns
[j
]))
4646 insert_innermost_unit_dist_vector (ddr
);
4650 for (j
= 0; j
< cb
->n
; j
++)
4651 if (affine_function_zero_p (cb
->fns
[j
]))
4653 insert_innermost_unit_dist_vector (ddr
);
4659 /* Return true when the DDR contains two data references that have the
4660 same access functions. */
4663 same_access_functions (const struct data_dependence_relation
*ddr
)
4668 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
4669 if (!eq_evolutions_p (SUB_ACCESS_FN (sub
, 0),
4670 SUB_ACCESS_FN (sub
, 1)))
4676 /* Compute the classic per loop distance vector. DDR is the data
4677 dependence relation to build a vector from. Return false when fail
4678 to represent the data dependence as a distance vector. */
4681 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
4682 class loop
*loop_nest
)
4684 bool init_b
= false;
4685 int index_carry
= DDR_NB_LOOPS (ddr
);
4686 lambda_vector dist_v
;
4688 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
4691 if (same_access_functions (ddr
))
4693 /* Save the 0 vector. */
4694 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4695 save_dist_v (ddr
, dist_v
);
4697 if (constant_access_functions (ddr
))
4698 add_distance_for_zero_overlaps (ddr
);
4700 if (DDR_NB_LOOPS (ddr
) > 1)
4701 add_other_self_distances (ddr
);
4706 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4707 if (!build_classic_dist_vector_1 (ddr
, 0, 1, dist_v
, &init_b
, &index_carry
))
4710 /* Save the distance vector if we initialized one. */
4713 /* Verify a basic constraint: classic distance vectors should
4714 always be lexicographically positive.
4716 Data references are collected in the order of execution of
4717 the program, thus for the following loop
4719 | for (i = 1; i < 100; i++)
4720 | for (j = 1; j < 100; j++)
4722 | t = T[j+1][i-1]; // A
4723 | T[j][i] = t + 2; // B
4726 references are collected following the direction of the wind:
4727 A then B. The data dependence tests are performed also
4728 following this order, such that we're looking at the distance
4729 separating the elements accessed by A from the elements later
4730 accessed by B. But in this example, the distance returned by
4731 test_dep (A, B) is lexicographically negative (-1, 1), that
4732 means that the access A occurs later than B with respect to
4733 the outer loop, ie. we're actually looking upwind. In this
4734 case we solve test_dep (B, A) looking downwind to the
4735 lexicographically positive solution, that returns the
4736 distance vector (1, -1). */
4737 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
4739 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4740 if (!subscript_dependence_tester_1 (ddr
, 1, 0, loop_nest
))
4742 compute_subscript_distance (ddr
);
4743 if (!build_classic_dist_vector_1 (ddr
, 1, 0, save_v
, &init_b
,
4746 save_dist_v (ddr
, save_v
);
4747 DDR_REVERSED_P (ddr
) = true;
4749 /* In this case there is a dependence forward for all the
4752 | for (k = 1; k < 100; k++)
4753 | for (i = 1; i < 100; i++)
4754 | for (j = 1; j < 100; j++)
4756 | t = T[j+1][i-1]; // A
4757 | T[j][i] = t + 2; // B
4765 if (DDR_NB_LOOPS (ddr
) > 1)
4767 add_outer_distances (ddr
, save_v
, index_carry
);
4768 add_outer_distances (ddr
, dist_v
, index_carry
);
4773 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4774 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
4776 if (DDR_NB_LOOPS (ddr
) > 1)
4778 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4780 if (!subscript_dependence_tester_1 (ddr
, 1, 0, loop_nest
))
4782 compute_subscript_distance (ddr
);
4783 if (!build_classic_dist_vector_1 (ddr
, 1, 0, opposite_v
, &init_b
,
4787 save_dist_v (ddr
, save_v
);
4788 add_outer_distances (ddr
, dist_v
, index_carry
);
4789 add_outer_distances (ddr
, opposite_v
, index_carry
);
4792 save_dist_v (ddr
, save_v
);
4797 /* There is a distance of 1 on all the outer loops: Example:
4798 there is a dependence of distance 1 on loop_1 for the array A.
4804 add_outer_distances (ddr
, dist_v
,
4805 lambda_vector_first_nz (dist_v
,
4806 DDR_NB_LOOPS (ddr
), 0));
4809 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4813 fprintf (dump_file
, "(build_classic_dist_vector\n");
4814 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4816 fprintf (dump_file
, " dist_vector = (");
4817 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
4818 DDR_NB_LOOPS (ddr
));
4819 fprintf (dump_file
, " )\n");
4821 fprintf (dump_file
, ")\n");
4827 /* Return the direction for a given distance.
4828 FIXME: Computing dir this way is suboptimal, since dir can catch
4829 cases that dist is unable to represent. */
4831 static inline enum data_dependence_direction
4832 dir_from_dist (int dist
)
4835 return dir_positive
;
4837 return dir_negative
;
4842 /* Compute the classic per loop direction vector. DDR is the data
4843 dependence relation to build a vector from. */
4846 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
4849 lambda_vector dist_v
;
4851 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
4853 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4855 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
4856 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
4858 save_dir_v (ddr
, dir_v
);
4862 /* Helper function. Returns true when there is a dependence between the
4863 data references. A_INDEX is the index of the first reference (0 for
4864 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4867 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
4868 unsigned int a_index
, unsigned int b_index
,
4869 class loop
*loop_nest
)
4872 tree last_conflicts
;
4873 struct subscript
*subscript
;
4874 tree res
= NULL_TREE
;
4876 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
4878 conflict_function
*overlaps_a
, *overlaps_b
;
4880 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript
, a_index
),
4881 SUB_ACCESS_FN (subscript
, b_index
),
4882 &overlaps_a
, &overlaps_b
,
4883 &last_conflicts
, loop_nest
);
4885 if (SUB_CONFLICTS_IN_A (subscript
))
4886 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
4887 if (SUB_CONFLICTS_IN_B (subscript
))
4888 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
4890 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
4891 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
4892 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
4894 /* If there is any undetermined conflict function we have to
4895 give a conservative answer in case we cannot prove that
4896 no dependence exists when analyzing another subscript. */
4897 if (CF_NOT_KNOWN_P (overlaps_a
)
4898 || CF_NOT_KNOWN_P (overlaps_b
))
4900 res
= chrec_dont_know
;
4904 /* When there is a subscript with no dependence we can stop. */
4905 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
4906 || CF_NO_DEPENDENCE_P (overlaps_b
))
4913 if (res
== NULL_TREE
)
4916 if (res
== chrec_known
)
4917 dependence_stats
.num_dependence_independent
++;
4919 dependence_stats
.num_dependence_undetermined
++;
4920 finalize_ddr_dependent (ddr
, res
);
4924 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4927 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
4928 class loop
*loop_nest
)
4930 if (subscript_dependence_tester_1 (ddr
, 0, 1, loop_nest
))
4931 dependence_stats
.num_dependence_dependent
++;
4933 compute_subscript_distance (ddr
);
4934 if (build_classic_dist_vector (ddr
, loop_nest
))
4935 build_classic_dir_vector (ddr
);
4938 /* Returns true when all the access functions of A are affine or
4939 constant with respect to LOOP_NEST. */
4942 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
4943 const class loop
*loop_nest
)
4946 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
4949 FOR_EACH_VEC_ELT (fns
, i
, t
)
4950 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
4951 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
4957 /* This computes the affine dependence relation between A and B with
4958 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4959 independence between two accesses, while CHREC_DONT_KNOW is used
4960 for representing the unknown relation.
4962 Note that it is possible to stop the computation of the dependence
4963 relation the first time we detect a CHREC_KNOWN element for a given
4967 compute_affine_dependence (struct data_dependence_relation
*ddr
,
4968 class loop
*loop_nest
)
4970 struct data_reference
*dra
= DDR_A (ddr
);
4971 struct data_reference
*drb
= DDR_B (ddr
);
4973 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4975 fprintf (dump_file
, "(compute_affine_dependence\n");
4976 fprintf (dump_file
, " stmt_a: ");
4977 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
4978 fprintf (dump_file
, " stmt_b: ");
4979 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
4982 /* Analyze only when the dependence relation is not yet known. */
4983 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4985 dependence_stats
.num_dependence_tests
++;
4987 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
4988 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
4989 subscript_dependence_tester (ddr
, loop_nest
);
4991 /* As a last case, if the dependence cannot be determined, or if
4992 the dependence is considered too difficult to determine, answer
4996 dependence_stats
.num_dependence_undetermined
++;
4998 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5000 fprintf (dump_file
, "Data ref a:\n");
5001 dump_data_reference (dump_file
, dra
);
5002 fprintf (dump_file
, "Data ref b:\n");
5003 dump_data_reference (dump_file
, drb
);
5004 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
5006 finalize_ddr_dependent (ddr
, chrec_dont_know
);
5010 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5012 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
5013 fprintf (dump_file
, ") -> no dependence\n");
5014 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
5015 fprintf (dump_file
, ") -> dependence analysis failed\n");
5017 fprintf (dump_file
, ")\n");
5021 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5022 the data references in DATAREFS, in the LOOP_NEST. When
5023 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5024 relations. Return true when successful, i.e. data references number
5025 is small enough to be handled. */
5028 compute_all_dependences (vec
<data_reference_p
> datarefs
,
5029 vec
<ddr_p
> *dependence_relations
,
5030 vec
<loop_p
> loop_nest
,
5031 bool compute_self_and_rr
)
5033 struct data_dependence_relation
*ddr
;
5034 struct data_reference
*a
, *b
;
5037 if ((int) datarefs
.length ()
5038 > param_loop_max_datarefs_for_datadeps
)
5040 struct data_dependence_relation
*ddr
;
5042 /* Insert a single relation into dependence_relations:
5044 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
5045 dependence_relations
->safe_push (ddr
);
5049 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
5050 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
5051 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
5053 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
5054 dependence_relations
->safe_push (ddr
);
5055 if (loop_nest
.exists ())
5056 compute_affine_dependence (ddr
, loop_nest
[0]);
5059 if (compute_self_and_rr
)
5060 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
5062 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
5063 dependence_relations
->safe_push (ddr
);
5064 if (loop_nest
.exists ())
5065 compute_affine_dependence (ddr
, loop_nest
[0]);
5071 /* Describes a location of a memory reference. */
5075 /* The memory reference. */
5078 /* True if the memory reference is read. */
5081 /* True if the data reference is conditional within the containing
5082 statement, i.e. if it might not occur even when the statement
5083 is executed and runs to completion. */
5084 bool is_conditional_in_stmt
;
5088 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5089 true if STMT clobbers memory, false otherwise. */
5092 get_references_in_stmt (gimple
*stmt
, vec
<data_ref_loc
, va_heap
> *references
)
5094 bool clobbers_memory
= false;
5097 enum gimple_code stmt_code
= gimple_code (stmt
);
5099 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5100 As we cannot model data-references to not spelled out
5101 accesses give up if they may occur. */
5102 if (stmt_code
== GIMPLE_CALL
5103 && !(gimple_call_flags (stmt
) & ECF_CONST
))
5105 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5106 if (gimple_call_internal_p (stmt
))
5107 switch (gimple_call_internal_fn (stmt
))
5109 case IFN_GOMP_SIMD_LANE
:
5111 class loop
*loop
= gimple_bb (stmt
)->loop_father
;
5112 tree uid
= gimple_call_arg (stmt
, 0);
5113 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
5115 || loop
->simduid
!= SSA_NAME_VAR (uid
))
5116 clobbers_memory
= true;
5120 case IFN_MASK_STORE
:
5123 clobbers_memory
= true;
5127 clobbers_memory
= true;
5129 else if (stmt_code
== GIMPLE_ASM
5130 && (gimple_asm_volatile_p (as_a
<gasm
*> (stmt
))
5131 || gimple_vuse (stmt
)))
5132 clobbers_memory
= true;
5134 if (!gimple_vuse (stmt
))
5135 return clobbers_memory
;
5137 if (stmt_code
== GIMPLE_ASSIGN
)
5140 op0
= gimple_assign_lhs (stmt
);
5141 op1
= gimple_assign_rhs1 (stmt
);
5144 || (REFERENCE_CLASS_P (op1
)
5145 && (base
= get_base_address (op1
))
5146 && TREE_CODE (base
) != SSA_NAME
5147 && !is_gimple_min_invariant (base
)))
5151 ref
.is_conditional_in_stmt
= false;
5152 references
->safe_push (ref
);
5155 else if (stmt_code
== GIMPLE_CALL
)
5161 ref
.is_read
= false;
5162 if (gimple_call_internal_p (stmt
))
5163 switch (gimple_call_internal_fn (stmt
))
5166 if (gimple_call_lhs (stmt
) == NULL_TREE
)
5170 case IFN_MASK_STORE
:
5171 ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)), 0);
5172 align
= tree_to_shwi (gimple_call_arg (stmt
, 1));
5174 type
= TREE_TYPE (gimple_call_lhs (stmt
));
5176 type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
5177 if (TYPE_ALIGN (type
) != align
)
5178 type
= build_aligned_type (type
, align
);
5179 ref
.is_conditional_in_stmt
= true;
5180 ref
.ref
= fold_build2 (MEM_REF
, type
, gimple_call_arg (stmt
, 0),
5182 references
->safe_push (ref
);
5188 op0
= gimple_call_lhs (stmt
);
5189 n
= gimple_call_num_args (stmt
);
5190 for (i
= 0; i
< n
; i
++)
5192 op1
= gimple_call_arg (stmt
, i
);
5195 || (REFERENCE_CLASS_P (op1
) && get_base_address (op1
)))
5199 ref
.is_conditional_in_stmt
= false;
5200 references
->safe_push (ref
);
5205 return clobbers_memory
;
5209 || (REFERENCE_CLASS_P (op0
) && get_base_address (op0
))))
5212 ref
.is_read
= false;
5213 ref
.is_conditional_in_stmt
= false;
5214 references
->safe_push (ref
);
5216 return clobbers_memory
;
5220 /* Returns true if the loop-nest has any data reference. */
5223 loop_nest_has_data_refs (loop_p loop
)
5225 basic_block
*bbs
= get_loop_body (loop
);
5226 auto_vec
<data_ref_loc
, 3> references
;
5228 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
5230 basic_block bb
= bbs
[i
];
5231 gimple_stmt_iterator bsi
;
5233 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5235 gimple
*stmt
= gsi_stmt (bsi
);
5236 get_references_in_stmt (stmt
, &references
);
5237 if (references
.length ())
5248 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5249 reference, returns false, otherwise returns true. NEST is the outermost
5250 loop of the loop nest in which the references should be analyzed. */
5253 find_data_references_in_stmt (class loop
*nest
, gimple
*stmt
,
5254 vec
<data_reference_p
> *datarefs
)
5257 auto_vec
<data_ref_loc
, 2> references
;
5259 data_reference_p dr
;
5261 if (get_references_in_stmt (stmt
, &references
))
5262 return opt_result::failure_at (stmt
, "statement clobbers memory: %G",
5265 FOR_EACH_VEC_ELT (references
, i
, ref
)
5267 dr
= create_data_ref (nest
? loop_preheader_edge (nest
) : NULL
,
5268 loop_containing_stmt (stmt
), ref
->ref
,
5269 stmt
, ref
->is_read
, ref
->is_conditional_in_stmt
);
5270 gcc_assert (dr
!= NULL
);
5271 datarefs
->safe_push (dr
);
5274 return opt_result::success ();
5277 /* Stores the data references in STMT to DATAREFS. If there is an
5278 unanalyzable reference, returns false, otherwise returns true.
5279 NEST is the outermost loop of the loop nest in which the references
5280 should be instantiated, LOOP is the loop in which the references
5281 should be analyzed. */
5284 graphite_find_data_references_in_stmt (edge nest
, loop_p loop
, gimple
*stmt
,
5285 vec
<data_reference_p
> *datarefs
)
5288 auto_vec
<data_ref_loc
, 2> references
;
5291 data_reference_p dr
;
5293 if (get_references_in_stmt (stmt
, &references
))
5296 FOR_EACH_VEC_ELT (references
, i
, ref
)
5298 dr
= create_data_ref (nest
, loop
, ref
->ref
, stmt
, ref
->is_read
,
5299 ref
->is_conditional_in_stmt
);
5300 gcc_assert (dr
!= NULL
);
5301 datarefs
->safe_push (dr
);
5307 /* Search the data references in LOOP, and record the information into
5308 DATAREFS. Returns chrec_dont_know when failing to analyze a
5309 difficult case, returns NULL_TREE otherwise. */
5312 find_data_references_in_bb (class loop
*loop
, basic_block bb
,
5313 vec
<data_reference_p
> *datarefs
)
5315 gimple_stmt_iterator bsi
;
5317 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5319 gimple
*stmt
= gsi_stmt (bsi
);
5321 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
5323 struct data_reference
*res
;
5324 res
= XCNEW (struct data_reference
);
5325 datarefs
->safe_push (res
);
5327 return chrec_dont_know
;
5334 /* Search the data references in LOOP, and record the information into
5335 DATAREFS. Returns chrec_dont_know when failing to analyze a
5336 difficult case, returns NULL_TREE otherwise.
5338 TODO: This function should be made smarter so that it can handle address
5339 arithmetic as if they were array accesses, etc. */
5342 find_data_references_in_loop (class loop
*loop
,
5343 vec
<data_reference_p
> *datarefs
)
5345 basic_block bb
, *bbs
;
5348 bbs
= get_loop_body_in_dom_order (loop
);
5350 for (i
= 0; i
< loop
->num_nodes
; i
++)
5354 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
5357 return chrec_dont_know
;
5365 /* Return the alignment in bytes that DRB is guaranteed to have at all
5369 dr_alignment (innermost_loop_behavior
*drb
)
5371 /* Get the alignment of BASE_ADDRESS + INIT. */
5372 unsigned int alignment
= drb
->base_alignment
;
5373 unsigned int misalignment
= (drb
->base_misalignment
5374 + TREE_INT_CST_LOW (drb
->init
));
5375 if (misalignment
!= 0)
5376 alignment
= MIN (alignment
, misalignment
& -misalignment
);
5378 /* Cap it to the alignment of OFFSET. */
5379 if (!integer_zerop (drb
->offset
))
5380 alignment
= MIN (alignment
, drb
->offset_alignment
);
5382 /* Cap it to the alignment of STEP. */
5383 if (!integer_zerop (drb
->step
))
5384 alignment
= MIN (alignment
, drb
->step_alignment
);
5389 /* If BASE is a pointer-typed SSA name, try to find the object that it
5390 is based on. Return this object X on success and store the alignment
5391 in bytes of BASE - &X in *ALIGNMENT_OUT. */
5394 get_base_for_alignment_1 (tree base
, unsigned int *alignment_out
)
5396 if (TREE_CODE (base
) != SSA_NAME
|| !POINTER_TYPE_P (TREE_TYPE (base
)))
5399 gimple
*def
= SSA_NAME_DEF_STMT (base
);
5400 base
= analyze_scalar_evolution (loop_containing_stmt (def
), base
);
5402 /* Peel chrecs and record the minimum alignment preserved by
5404 unsigned int alignment
= MAX_OFILE_ALIGNMENT
/ BITS_PER_UNIT
;
5405 while (TREE_CODE (base
) == POLYNOMIAL_CHREC
)
5407 unsigned int step_alignment
= highest_pow2_factor (CHREC_RIGHT (base
));
5408 alignment
= MIN (alignment
, step_alignment
);
5409 base
= CHREC_LEFT (base
);
5412 /* Punt if the expression is too complicated to handle. */
5413 if (tree_contains_chrecs (base
, NULL
) || !POINTER_TYPE_P (TREE_TYPE (base
)))
5416 /* The only useful cases are those for which a dereference folds to something
5417 other than an INDIRECT_REF. */
5418 tree ref_type
= TREE_TYPE (TREE_TYPE (base
));
5419 tree ref
= fold_indirect_ref_1 (UNKNOWN_LOCATION
, ref_type
, base
);
5423 /* Analyze the base to which the steps we peeled were applied. */
5424 poly_int64 bitsize
, bitpos
, bytepos
;
5426 int unsignedp
, reversep
, volatilep
;
5428 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
5429 &unsignedp
, &reversep
, &volatilep
);
5430 if (!base
|| !multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
5433 /* Restrict the alignment to that guaranteed by the offsets. */
5434 unsigned int bytepos_alignment
= known_alignment (bytepos
);
5435 if (bytepos_alignment
!= 0)
5436 alignment
= MIN (alignment
, bytepos_alignment
);
5439 unsigned int offset_alignment
= highest_pow2_factor (offset
);
5440 alignment
= MIN (alignment
, offset_alignment
);
5443 *alignment_out
= alignment
;
5447 /* Return the object whose alignment would need to be changed in order
5448 to increase the alignment of ADDR. Store the maximum achievable
5449 alignment in *MAX_ALIGNMENT. */
5452 get_base_for_alignment (tree addr
, unsigned int *max_alignment
)
5454 tree base
= get_base_for_alignment_1 (addr
, max_alignment
);
5458 if (TREE_CODE (addr
) == ADDR_EXPR
)
5459 addr
= TREE_OPERAND (addr
, 0);
5460 *max_alignment
= MAX_OFILE_ALIGNMENT
/ BITS_PER_UNIT
;
5464 /* Recursive helper function. */
5467 find_loop_nest_1 (class loop
*loop
, vec
<loop_p
> *loop_nest
)
5469 /* Inner loops of the nest should not contain siblings. Example:
5470 when there are two consecutive loops,
5481 the dependence relation cannot be captured by the distance
5486 loop_nest
->safe_push (loop
);
5488 return find_loop_nest_1 (loop
->inner
, loop_nest
);
5492 /* Return false when the LOOP is not well nested. Otherwise return
5493 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5494 contain the loops from the outermost to the innermost, as they will
5495 appear in the classic distance vector. */
5498 find_loop_nest (class loop
*loop
, vec
<loop_p
> *loop_nest
)
5500 loop_nest
->safe_push (loop
);
5502 return find_loop_nest_1 (loop
->inner
, loop_nest
);
5506 /* Returns true when the data dependences have been computed, false otherwise.
5507 Given a loop nest LOOP, the following vectors are returned:
5508 DATAREFS is initialized to all the array elements contained in this loop,
5509 DEPENDENCE_RELATIONS contains the relations between the data references.
5510 Compute read-read and self relations if
5511 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5514 compute_data_dependences_for_loop (class loop
*loop
,
5515 bool compute_self_and_read_read_dependences
,
5516 vec
<loop_p
> *loop_nest
,
5517 vec
<data_reference_p
> *datarefs
,
5518 vec
<ddr_p
> *dependence_relations
)
5522 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
5524 /* If the loop nest is not well formed, or one of the data references
5525 is not computable, give up without spending time to compute other
5528 || !find_loop_nest (loop
, loop_nest
)
5529 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
5530 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
5531 compute_self_and_read_read_dependences
))
5534 if (dump_file
&& (dump_flags
& TDF_STATS
))
5536 fprintf (dump_file
, "Dependence tester statistics:\n");
5538 fprintf (dump_file
, "Number of dependence tests: %d\n",
5539 dependence_stats
.num_dependence_tests
);
5540 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
5541 dependence_stats
.num_dependence_dependent
);
5542 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
5543 dependence_stats
.num_dependence_independent
);
5544 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
5545 dependence_stats
.num_dependence_undetermined
);
5547 fprintf (dump_file
, "Number of subscript tests: %d\n",
5548 dependence_stats
.num_subscript_tests
);
5549 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
5550 dependence_stats
.num_subscript_undetermined
);
5551 fprintf (dump_file
, "Number of same subscript function: %d\n",
5552 dependence_stats
.num_same_subscript_function
);
5554 fprintf (dump_file
, "Number of ziv tests: %d\n",
5555 dependence_stats
.num_ziv
);
5556 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
5557 dependence_stats
.num_ziv_dependent
);
5558 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
5559 dependence_stats
.num_ziv_independent
);
5560 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
5561 dependence_stats
.num_ziv_unimplemented
);
5563 fprintf (dump_file
, "Number of siv tests: %d\n",
5564 dependence_stats
.num_siv
);
5565 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
5566 dependence_stats
.num_siv_dependent
);
5567 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
5568 dependence_stats
.num_siv_independent
);
5569 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
5570 dependence_stats
.num_siv_unimplemented
);
5572 fprintf (dump_file
, "Number of miv tests: %d\n",
5573 dependence_stats
.num_miv
);
5574 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
5575 dependence_stats
.num_miv_dependent
);
5576 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
5577 dependence_stats
.num_miv_independent
);
5578 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
5579 dependence_stats
.num_miv_unimplemented
);
5585 /* Free the memory used by a data dependence relation DDR. */
5588 free_dependence_relation (struct data_dependence_relation
*ddr
)
5593 if (DDR_SUBSCRIPTS (ddr
).exists ())
5594 free_subscripts (DDR_SUBSCRIPTS (ddr
));
5595 DDR_DIST_VECTS (ddr
).release ();
5596 DDR_DIR_VECTS (ddr
).release ();
5601 /* Free the memory used by the data dependence relations from
5602 DEPENDENCE_RELATIONS. */
5605 free_dependence_relations (vec
<ddr_p
> dependence_relations
)
5608 struct data_dependence_relation
*ddr
;
5610 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
5612 free_dependence_relation (ddr
);
5614 dependence_relations
.release ();
5617 /* Free the memory used by the data references from DATAREFS. */
5620 free_data_refs (vec
<data_reference_p
> datarefs
)
5623 struct data_reference
*dr
;
5625 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
5627 datarefs
.release ();
5630 /* Common routine implementing both dr_direction_indicator and
5631 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
5632 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5633 Return the step as the indicator otherwise. */
5636 dr_step_indicator (struct data_reference
*dr
, int useful_min
)
5638 tree step
= DR_STEP (dr
);
5642 /* Look for cases where the step is scaled by a positive constant
5643 integer, which will often be the access size. If the multiplication
5644 doesn't change the sign (due to overflow effects) then we can
5645 test the unscaled value instead. */
5646 if (TREE_CODE (step
) == MULT_EXPR
5647 && TREE_CODE (TREE_OPERAND (step
, 1)) == INTEGER_CST
5648 && tree_int_cst_sgn (TREE_OPERAND (step
, 1)) > 0)
5650 tree factor
= TREE_OPERAND (step
, 1);
5651 step
= TREE_OPERAND (step
, 0);
5653 /* Strip widening and truncating conversions as well as nops. */
5654 if (CONVERT_EXPR_P (step
)
5655 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step
, 0))))
5656 step
= TREE_OPERAND (step
, 0);
5657 tree type
= TREE_TYPE (step
);
5659 /* Get the range of step values that would not cause overflow. */
5660 widest_int minv
= (wi::to_widest (TYPE_MIN_VALUE (ssizetype
))
5661 / wi::to_widest (factor
));
5662 widest_int maxv
= (wi::to_widest (TYPE_MAX_VALUE (ssizetype
))
5663 / wi::to_widest (factor
));
5665 /* Get the range of values that the unconverted step actually has. */
5666 wide_int step_min
, step_max
;
5667 if (TREE_CODE (step
) != SSA_NAME
5668 || get_range_info (step
, &step_min
, &step_max
) != VR_RANGE
)
5670 step_min
= wi::to_wide (TYPE_MIN_VALUE (type
));
5671 step_max
= wi::to_wide (TYPE_MAX_VALUE (type
));
5674 /* Check whether the unconverted step has an acceptable range. */
5675 signop sgn
= TYPE_SIGN (type
);
5676 if (wi::les_p (minv
, widest_int::from (step_min
, sgn
))
5677 && wi::ges_p (maxv
, widest_int::from (step_max
, sgn
)))
5679 if (wi::ge_p (step_min
, useful_min
, sgn
))
5680 return ssize_int (useful_min
);
5681 else if (wi::lt_p (step_max
, 0, sgn
))
5682 return ssize_int (-1);
5684 return fold_convert (ssizetype
, step
);
5687 return DR_STEP (dr
);
5690 /* Return a value that is negative iff DR has a negative step. */
5693 dr_direction_indicator (struct data_reference
*dr
)
5695 return dr_step_indicator (dr
, 0);
5698 /* Return a value that is zero iff DR has a zero step. */
5701 dr_zero_step_indicator (struct data_reference
*dr
)
5703 return dr_step_indicator (dr
, 1);
5706 /* Return true if DR is known to have a nonnegative (but possibly zero)
5710 dr_known_forward_stride_p (struct data_reference
*dr
)
5712 tree indicator
= dr_direction_indicator (dr
);
5713 tree neg_step_val
= fold_binary (LT_EXPR
, boolean_type_node
,
5714 fold_convert (ssizetype
, indicator
),
5716 return neg_step_val
&& integer_zerop (neg_step_val
);