1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
89 static struct datadep_stats
91 int num_dependence_tests
;
92 int num_dependence_dependent
;
93 int num_dependence_independent
;
94 int num_dependence_undetermined
;
96 int num_subscript_tests
;
97 int num_subscript_undetermined
;
98 int num_same_subscript_function
;
101 int num_ziv_independent
;
102 int num_ziv_dependent
;
103 int num_ziv_unimplemented
;
106 int num_siv_independent
;
107 int num_siv_dependent
;
108 int num_siv_unimplemented
;
111 int num_miv_independent
;
112 int num_miv_dependent
;
113 int num_miv_unimplemented
;
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
117 struct data_reference
*,
118 struct data_reference
*,
120 /* Returns true iff A divides B. */
123 tree_fold_divides_p (const_tree a
, const_tree b
)
125 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
126 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
130 /* Returns true iff A divides B. */
133 int_divides_p (int a
, int b
)
135 return ((b
% a
) == 0);
140 /* Dump into FILE all the data references from DATAREFS. */
143 dump_data_references (FILE *file
, VEC (data_reference_p
, heap
) *datarefs
)
146 struct data_reference
*dr
;
148 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
149 dump_data_reference (file
, dr
);
152 /* Dump into STDERR all the data references from DATAREFS. */
155 debug_data_references (VEC (data_reference_p
, heap
) *datarefs
)
157 dump_data_references (stderr
, datarefs
);
160 /* Dump to STDERR all the dependence relations from DDRS. */
163 debug_data_dependence_relations (VEC (ddr_p
, heap
) *ddrs
)
165 dump_data_dependence_relations (stderr
, ddrs
);
168 /* Dump into FILE all the dependence relations from DDRS. */
171 dump_data_dependence_relations (FILE *file
,
172 VEC (ddr_p
, heap
) *ddrs
)
175 struct data_dependence_relation
*ddr
;
177 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
178 dump_data_dependence_relation (file
, ddr
);
181 /* Print to STDERR the data_reference DR. */
184 debug_data_reference (struct data_reference
*dr
)
186 dump_data_reference (stderr
, dr
);
189 /* Dump function for a DATA_REFERENCE structure. */
192 dump_data_reference (FILE *outf
,
193 struct data_reference
*dr
)
197 fprintf (outf
, "#(Data Ref: \n");
198 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
199 fprintf (outf
, "# stmt: ");
200 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
201 fprintf (outf
, "# ref: ");
202 print_generic_stmt (outf
, DR_REF (dr
), 0);
203 fprintf (outf
, "# base_object: ");
204 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
206 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
208 fprintf (outf
, "# Access function %d: ", i
);
209 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
211 fprintf (outf
, "#)\n");
214 /* Dumps the affine function described by FN to the file OUTF. */
217 dump_affine_function (FILE *outf
, affine_fn fn
)
222 print_generic_expr (outf
, VEC_index (tree
, fn
, 0), TDF_SLIM
);
223 for (i
= 1; VEC_iterate (tree
, fn
, i
, coef
); i
++)
225 fprintf (outf
, " + ");
226 print_generic_expr (outf
, coef
, TDF_SLIM
);
227 fprintf (outf
, " * x_%u", i
);
231 /* Dumps the conflict function CF to the file OUTF. */
234 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
238 if (cf
->n
== NO_DEPENDENCE
)
239 fprintf (outf
, "no dependence\n");
240 else if (cf
->n
== NOT_KNOWN
)
241 fprintf (outf
, "not known\n");
244 for (i
= 0; i
< cf
->n
; i
++)
247 dump_affine_function (outf
, cf
->fns
[i
]);
248 fprintf (outf
, "]\n");
253 /* Dump function for a SUBSCRIPT structure. */
256 dump_subscript (FILE *outf
, struct subscript
*subscript
)
258 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
260 fprintf (outf
, "\n (subscript \n");
261 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
262 dump_conflict_function (outf
, cf
);
263 if (CF_NONTRIVIAL_P (cf
))
265 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
266 fprintf (outf
, " last_conflict: ");
267 print_generic_stmt (outf
, last_iteration
, 0);
270 cf
= SUB_CONFLICTS_IN_B (subscript
);
271 fprintf (outf
, " iterations_that_access_an_element_twice_in_B: ");
272 dump_conflict_function (outf
, cf
);
273 if (CF_NONTRIVIAL_P (cf
))
275 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
276 fprintf (outf
, " last_conflict: ");
277 print_generic_stmt (outf
, last_iteration
, 0);
280 fprintf (outf
, " (Subscript distance: ");
281 print_generic_stmt (outf
, SUB_DISTANCE (subscript
), 0);
282 fprintf (outf
, " )\n");
283 fprintf (outf
, " )\n");
286 /* Print the classic direction vector DIRV to OUTF. */
289 print_direction_vector (FILE *outf
,
295 for (eq
= 0; eq
< length
; eq
++)
297 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
303 fprintf (outf
, " +");
306 fprintf (outf
, " -");
309 fprintf (outf
, " =");
311 case dir_positive_or_equal
:
312 fprintf (outf
, " +=");
314 case dir_positive_or_negative
:
315 fprintf (outf
, " +-");
317 case dir_negative_or_equal
:
318 fprintf (outf
, " -=");
321 fprintf (outf
, " *");
324 fprintf (outf
, "indep");
328 fprintf (outf
, "\n");
331 /* Print a vector of direction vectors. */
334 print_dir_vectors (FILE *outf
, VEC (lambda_vector
, heap
) *dir_vects
,
340 FOR_EACH_VEC_ELT (lambda_vector
, dir_vects
, j
, v
)
341 print_direction_vector (outf
, v
, length
);
344 /* Print out a vector VEC of length N to OUTFILE. */
347 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
351 for (i
= 0; i
< n
; i
++)
352 fprintf (outfile
, "%3d ", vector
[i
]);
353 fprintf (outfile
, "\n");
356 /* Print a vector of distance vectors. */
359 print_dist_vectors (FILE *outf
, VEC (lambda_vector
, heap
) *dist_vects
,
365 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, j
, v
)
366 print_lambda_vector (outf
, v
, length
);
372 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
374 dump_data_dependence_relation (stderr
, ddr
);
377 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
380 dump_data_dependence_relation (FILE *outf
,
381 struct data_dependence_relation
*ddr
)
383 struct data_reference
*dra
, *drb
;
385 fprintf (outf
, "(Data Dep: \n");
387 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
394 dump_data_reference (outf
, dra
);
396 fprintf (outf
, " (nil)\n");
398 dump_data_reference (outf
, drb
);
400 fprintf (outf
, " (nil)\n");
402 fprintf (outf
, " (don't know)\n)\n");
408 dump_data_reference (outf
, dra
);
409 dump_data_reference (outf
, drb
);
411 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
412 fprintf (outf
, " (no dependence)\n");
414 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
419 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
421 fprintf (outf
, " access_fn_A: ");
422 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
423 fprintf (outf
, " access_fn_B: ");
424 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
425 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
428 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
429 fprintf (outf
, " loop nest: (");
430 FOR_EACH_VEC_ELT (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
)
431 fprintf (outf
, "%d ", loopi
->num
);
432 fprintf (outf
, ")\n");
434 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
436 fprintf (outf
, " distance_vector: ");
437 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
441 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
443 fprintf (outf
, " direction_vector: ");
444 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
449 fprintf (outf
, ")\n");
452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
455 dump_data_dependence_direction (FILE *file
,
456 enum data_dependence_direction dir
)
472 case dir_positive_or_negative
:
473 fprintf (file
, "+-");
476 case dir_positive_or_equal
:
477 fprintf (file
, "+=");
480 case dir_negative_or_equal
:
481 fprintf (file
, "-=");
493 /* Dumps the distance and direction vectors in FILE. DDRS contains
494 the dependence relations, and VECT_SIZE is the size of the
495 dependence vectors, or in other words the number of loops in the
499 dump_dist_dir_vectors (FILE *file
, VEC (ddr_p
, heap
) *ddrs
)
502 struct data_dependence_relation
*ddr
;
505 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
506 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
508 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), j
, v
)
510 fprintf (file
, "DISTANCE_V (");
511 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
512 fprintf (file
, ")\n");
515 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIR_VECTS (ddr
), j
, v
)
517 fprintf (file
, "DIRECTION_V (");
518 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
519 fprintf (file
, ")\n");
523 fprintf (file
, "\n\n");
526 /* Dumps the data dependence relations DDRS in FILE. */
529 dump_ddrs (FILE *file
, VEC (ddr_p
, heap
) *ddrs
)
532 struct data_dependence_relation
*ddr
;
534 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
535 dump_data_dependence_relation (file
, ddr
);
537 fprintf (file
, "\n\n");
540 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
541 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
542 constant of type ssizetype, and returns true. If we cannot do this
543 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
547 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
548 tree
*var
, tree
*off
)
552 enum tree_code ocode
= code
;
560 *var
= build_int_cst (type
, 0);
561 *off
= fold_convert (ssizetype
, op0
);
564 case POINTER_PLUS_EXPR
:
569 split_constant_offset (op0
, &var0
, &off0
);
570 split_constant_offset (op1
, &var1
, &off1
);
571 *var
= fold_build2 (code
, type
, var0
, var1
);
572 *off
= size_binop (ocode
, off0
, off1
);
576 if (TREE_CODE (op1
) != INTEGER_CST
)
579 split_constant_offset (op0
, &var0
, &off0
);
580 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
581 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
587 HOST_WIDE_INT pbitsize
, pbitpos
;
588 enum machine_mode pmode
;
589 int punsignedp
, pvolatilep
;
591 op0
= TREE_OPERAND (op0
, 0);
592 if (!handled_component_p (op0
))
595 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
596 &pmode
, &punsignedp
, &pvolatilep
, false);
598 if (pbitpos
% BITS_PER_UNIT
!= 0)
600 base
= build_fold_addr_expr (base
);
601 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
605 split_constant_offset (poffset
, &poffset
, &off1
);
606 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
607 if (POINTER_TYPE_P (TREE_TYPE (base
)))
608 base
= fold_build_pointer_plus (base
, poffset
);
610 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
611 fold_convert (TREE_TYPE (base
), poffset
));
614 var0
= fold_convert (type
, base
);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
628 while (POINTER_TYPE_P (type
))
629 type
= TREE_TYPE (type
);
630 if (int_size_in_bytes (type
) < 0)
640 gimple def_stmt
= SSA_NAME_DEF_STMT (op0
);
641 enum tree_code subcode
;
643 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
646 var0
= gimple_assign_rhs1 (def_stmt
);
647 subcode
= gimple_assign_rhs_code (def_stmt
);
648 var1
= gimple_assign_rhs2 (def_stmt
);
650 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype
= TREE_TYPE (op0
);
659 if ((POINTER_TYPE_P (itype
)
660 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
661 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
662 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
664 split_constant_offset (op0
, &var0
, off
);
665 *var
= fold_convert (type
, var0
);
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
680 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
682 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
686 *off
= ssize_int (0);
689 if (tree_is_chrec (exp
))
692 otype
= TREE_TYPE (exp
);
693 code
= TREE_CODE (exp
);
694 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
695 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
697 *var
= fold_convert (type
, e
);
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
706 canonicalize_base_object_address (tree addr
)
712 /* The base address may be obtained by casting from integer, in that case
714 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
717 if (TREE_CODE (addr
) != ADDR_EXPR
)
720 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
728 dr_analyze_innermost (struct data_reference
*dr
)
730 gimple stmt
= DR_STMT (dr
);
731 struct loop
*loop
= loop_containing_stmt (stmt
);
732 tree ref
= DR_REF (dr
);
733 HOST_WIDE_INT pbitsize
, pbitpos
;
735 enum machine_mode pmode
;
736 int punsignedp
, pvolatilep
;
737 affine_iv base_iv
, offset_iv
;
738 tree init
, dinit
, step
;
739 bool in_loop
= (loop
&& loop
->num
);
741 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
742 fprintf (dump_file
, "analyze_innermost: ");
744 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
745 &pmode
, &punsignedp
, &pvolatilep
, false);
746 gcc_assert (base
!= NULL_TREE
);
748 if (pbitpos
% BITS_PER_UNIT
!= 0)
750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
751 fprintf (dump_file
, "failed: bit offset alignment.\n");
755 if (TREE_CODE (base
) == MEM_REF
)
757 if (!integer_zerop (TREE_OPERAND (base
, 1)))
761 double_int moff
= mem_ref_offset (base
);
762 poffset
= double_int_to_tree (sizetype
, moff
);
765 poffset
= size_binop (PLUS_EXPR
, poffset
, TREE_OPERAND (base
, 1));
767 base
= TREE_OPERAND (base
, 0);
770 base
= build_fold_addr_expr (base
);
773 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
776 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
777 fprintf (dump_file
, "failed: evolution of base is not affine.\n");
784 base_iv
.step
= ssize_int (0);
785 base_iv
.no_overflow
= true;
790 offset_iv
.base
= ssize_int (0);
791 offset_iv
.step
= ssize_int (0);
797 offset_iv
.base
= poffset
;
798 offset_iv
.step
= ssize_int (0);
800 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
801 poffset
, &offset_iv
, false))
803 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
804 fprintf (dump_file
, "failed: evolution of offset is not"
810 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
811 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
812 init
= size_binop (PLUS_EXPR
, init
, dinit
);
813 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
814 init
= size_binop (PLUS_EXPR
, init
, dinit
);
816 step
= size_binop (PLUS_EXPR
,
817 fold_convert (ssizetype
, base_iv
.step
),
818 fold_convert (ssizetype
, offset_iv
.step
));
820 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
822 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
826 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
828 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
829 fprintf (dump_file
, "success.\n");
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
838 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
840 VEC (tree
, heap
) *access_fns
= NULL
;
841 tree ref
= unshare_expr (DR_REF (dr
)), aref
= ref
, op
;
842 tree base
, off
, access_fn
= NULL_TREE
;
843 basic_block before_loop
= NULL
;
847 DR_BASE_OBJECT (dr
) = ref
;
848 DR_ACCESS_FNS (dr
) = NULL
;
852 before_loop
= block_before_loop (nest
);
854 /* Analyze access functions of dimensions we know to be independent. */
855 while (handled_component_p (aref
))
857 /* For ARRAY_REFs the base is the reference with the index replaced
859 if (TREE_CODE (aref
) == ARRAY_REF
)
861 op
= TREE_OPERAND (aref
, 1);
862 access_fn
= analyze_scalar_evolution (loop
, op
);
863 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
864 VEC_safe_push (tree
, heap
, access_fns
, access_fn
);
865 TREE_OPERAND (aref
, 1) = build_int_cst (TREE_TYPE (op
), 0);
867 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
868 into a two element array with a constant index. The base is
869 then just the immediate underlying object. */
870 else if (TREE_CODE (aref
) == REALPART_EXPR
)
872 ref
= TREE_OPERAND (ref
, 0);
873 VEC_safe_push (tree
, heap
, access_fns
, integer_zero_node
);
875 else if (TREE_CODE (aref
) == IMAGPART_EXPR
)
877 ref
= TREE_OPERAND (ref
, 0);
878 VEC_safe_push (tree
, heap
, access_fns
, integer_one_node
);
881 aref
= TREE_OPERAND (aref
, 0);
884 /* If the address operand of a MEM_REF base has an evolution in the
885 analyzed nest, add it as an additional independent access-function. */
886 if (TREE_CODE (aref
) == MEM_REF
)
888 op
= TREE_OPERAND (aref
, 0);
889 access_fn
= analyze_scalar_evolution (loop
, op
);
890 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
891 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
893 base
= initial_condition (access_fn
);
894 split_constant_offset (base
, &base
, &off
);
895 /* Fold the MEM_REF offset into the evolutions initial
896 value to make more bases comparable. */
897 if (!integer_zerop (TREE_OPERAND (aref
, 1)))
899 off
= size_binop (PLUS_EXPR
, off
,
900 fold_convert (ssizetype
,
901 TREE_OPERAND (aref
, 1)));
902 TREE_OPERAND (aref
, 1)
903 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref
, 1)), 0);
905 access_fn
= chrec_replace_initial_condition
906 (access_fn
, fold_convert (TREE_TYPE (base
), off
));
907 TREE_OPERAND (aref
, 0) = base
;
908 VEC_safe_push (tree
, heap
, access_fns
, access_fn
);
912 if (TREE_CODE (ref
) == MEM_REF
913 && TREE_CODE (TREE_OPERAND (ref
, 0)) == ADDR_EXPR
914 && integer_zerop (TREE_OPERAND (ref
, 1)))
915 ref
= TREE_OPERAND (TREE_OPERAND (ref
, 0), 0);
917 /* For canonicalization purposes we'd like to strip all outermost
918 zero-offset component-refs.
919 ??? For now simply handle zero-index array-refs. */
920 while (TREE_CODE (ref
) == ARRAY_REF
921 && integer_zerop (TREE_OPERAND (ref
, 1)))
922 ref
= TREE_OPERAND (ref
, 0);
924 DR_BASE_OBJECT (dr
) = ref
;
925 DR_ACCESS_FNS (dr
) = access_fns
;
928 /* Extracts the alias analysis information from the memory reference DR. */
931 dr_analyze_alias (struct data_reference
*dr
)
933 tree ref
= DR_REF (dr
);
934 tree base
= get_base_address (ref
), addr
;
936 if (INDIRECT_REF_P (base
)
937 || TREE_CODE (base
) == MEM_REF
)
939 addr
= TREE_OPERAND (base
, 0);
940 if (TREE_CODE (addr
) == SSA_NAME
)
941 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
945 /* Frees data reference DR. */
948 free_data_ref (data_reference_p dr
)
950 VEC_free (tree
, heap
, DR_ACCESS_FNS (dr
));
954 /* Analyzes memory reference MEMREF accessed in STMT. The reference
955 is read if IS_READ is true, write otherwise. Returns the
956 data_reference description of MEMREF. NEST is the outermost loop
957 in which the reference should be instantiated, LOOP is the loop in
958 which the data reference should be analyzed. */
960 struct data_reference
*
961 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple stmt
,
964 struct data_reference
*dr
;
966 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
968 fprintf (dump_file
, "Creating dr for ");
969 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
970 fprintf (dump_file
, "\n");
973 dr
= XCNEW (struct data_reference
);
975 DR_REF (dr
) = memref
;
976 DR_IS_READ (dr
) = is_read
;
978 dr_analyze_innermost (dr
);
979 dr_analyze_indices (dr
, nest
, loop
);
980 dr_analyze_alias (dr
);
982 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
985 fprintf (dump_file
, "\tbase_address: ");
986 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
987 fprintf (dump_file
, "\n\toffset from base address: ");
988 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
989 fprintf (dump_file
, "\n\tconstant offset from base address: ");
990 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
991 fprintf (dump_file
, "\n\tstep: ");
992 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
993 fprintf (dump_file
, "\n\taligned to: ");
994 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
995 fprintf (dump_file
, "\n\tbase_object: ");
996 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
997 fprintf (dump_file
, "\n");
998 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1000 fprintf (dump_file
, "\tAccess function %d: ", i
);
1001 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1008 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1011 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
1015 STRIP_NOPS (offset1
);
1016 STRIP_NOPS (offset2
);
1018 if (offset1
== offset2
)
1021 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1022 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
1025 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
1026 TREE_OPERAND (offset2
, 0));
1028 if (!res
|| !BINARY_CLASS_P (offset1
))
1031 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1032 TREE_OPERAND (offset2
, 1));
1037 /* Check if DRA and DRB have equal offsets. */
1039 dr_equal_offsets_p (struct data_reference
*dra
,
1040 struct data_reference
*drb
)
1042 tree offset1
, offset2
;
1044 offset1
= DR_OFFSET (dra
);
1045 offset2
= DR_OFFSET (drb
);
1047 return dr_equal_offsets_p1 (offset1
, offset2
);
1050 /* Returns true if FNA == FNB. */
1053 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1055 unsigned i
, n
= VEC_length (tree
, fna
);
1057 if (n
!= VEC_length (tree
, fnb
))
1060 for (i
= 0; i
< n
; i
++)
1061 if (!operand_equal_p (VEC_index (tree
, fna
, i
),
1062 VEC_index (tree
, fnb
, i
), 0))
1068 /* If all the functions in CF are the same, returns one of them,
1069 otherwise returns NULL. */
1072 common_affine_function (conflict_function
*cf
)
1077 if (!CF_NONTRIVIAL_P (cf
))
1082 for (i
= 1; i
< cf
->n
; i
++)
1083 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1089 /* Returns the base of the affine function FN. */
1092 affine_function_base (affine_fn fn
)
1094 return VEC_index (tree
, fn
, 0);
1097 /* Returns true if FN is a constant. */
1100 affine_function_constant_p (affine_fn fn
)
1105 for (i
= 1; VEC_iterate (tree
, fn
, i
, coef
); i
++)
1106 if (!integer_zerop (coef
))
1112 /* Returns true if FN is the zero constant function. */
1115 affine_function_zero_p (affine_fn fn
)
1117 return (integer_zerop (affine_function_base (fn
))
1118 && affine_function_constant_p (fn
));
1121 /* Returns a signed integer type with the largest precision from TA
1125 signed_type_for_types (tree ta
, tree tb
)
1127 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1128 return signed_type_for (ta
);
1130 return signed_type_for (tb
);
1133 /* Applies operation OP on affine functions FNA and FNB, and returns the
1137 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1143 if (VEC_length (tree
, fnb
) > VEC_length (tree
, fna
))
1145 n
= VEC_length (tree
, fna
);
1146 m
= VEC_length (tree
, fnb
);
1150 n
= VEC_length (tree
, fnb
);
1151 m
= VEC_length (tree
, fna
);
1154 ret
= VEC_alloc (tree
, heap
, m
);
1155 for (i
= 0; i
< n
; i
++)
1157 tree type
= signed_type_for_types (TREE_TYPE (VEC_index (tree
, fna
, i
)),
1158 TREE_TYPE (VEC_index (tree
, fnb
, i
)));
1160 VEC_quick_push (tree
, ret
,
1161 fold_build2 (op
, type
,
1162 VEC_index (tree
, fna
, i
),
1163 VEC_index (tree
, fnb
, i
)));
1166 for (; VEC_iterate (tree
, fna
, i
, coef
); i
++)
1167 VEC_quick_push (tree
, ret
,
1168 fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1169 coef
, integer_zero_node
));
1170 for (; VEC_iterate (tree
, fnb
, i
, coef
); i
++)
1171 VEC_quick_push (tree
, ret
,
1172 fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1173 integer_zero_node
, coef
));
1178 /* Returns the sum of affine functions FNA and FNB. */
1181 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1183 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1186 /* Returns the difference of affine functions FNA and FNB. */
1189 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1191 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1194 /* Frees affine function FN. */
1197 affine_fn_free (affine_fn fn
)
1199 VEC_free (tree
, heap
, fn
);
1202 /* Determine for each subscript in the data dependence relation DDR
1206 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1208 conflict_function
*cf_a
, *cf_b
;
1209 affine_fn fn_a
, fn_b
, diff
;
1211 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1215 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1217 struct subscript
*subscript
;
1219 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1220 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1221 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1223 fn_a
= common_affine_function (cf_a
);
1224 fn_b
= common_affine_function (cf_b
);
1227 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1230 diff
= affine_fn_minus (fn_a
, fn_b
);
1232 if (affine_function_constant_p (diff
))
1233 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1235 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1237 affine_fn_free (diff
);
1242 /* Returns the conflict function for "unknown". */
1244 static conflict_function
*
1245 conflict_fn_not_known (void)
1247 conflict_function
*fn
= XCNEW (conflict_function
);
1253 /* Returns the conflict function for "independent". */
1255 static conflict_function
*
1256 conflict_fn_no_dependence (void)
1258 conflict_function
*fn
= XCNEW (conflict_function
);
1259 fn
->n
= NO_DEPENDENCE
;
1264 /* Returns true if the address of OBJ is invariant in LOOP. */
1267 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1269 while (handled_component_p (obj
))
1271 if (TREE_CODE (obj
) == ARRAY_REF
)
1273 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1274 need to check the stride and the lower bound of the reference. */
1275 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1277 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1281 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1283 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1287 obj
= TREE_OPERAND (obj
, 0);
1290 if (!INDIRECT_REF_P (obj
)
1291 && TREE_CODE (obj
) != MEM_REF
)
1294 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1298 /* Returns false if we can prove that data references A and B do not alias,
1299 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1303 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
1306 tree addr_a
= DR_BASE_OBJECT (a
);
1307 tree addr_b
= DR_BASE_OBJECT (b
);
1309 /* If we are not processing a loop nest but scalar code we
1310 do not need to care about possible cross-iteration dependences
1311 and thus can process the full original reference. Do so,
1312 similar to how loop invariant motion applies extra offset-based
1316 aff_tree off1
, off2
;
1317 double_int size1
, size2
;
1318 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
1319 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
1320 aff_combination_scale (&off1
, double_int_minus_one
);
1321 aff_combination_add (&off2
, &off1
);
1322 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
1326 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1327 return refs_output_dependent_p (addr_a
, addr_b
);
1328 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1329 return refs_anti_dependent_p (addr_a
, addr_b
);
1330 return refs_may_alias_p (addr_a
, addr_b
);
1333 static void compute_self_dependence (struct data_dependence_relation
*);
1335 /* Initialize a data dependence relation between data accesses A and
1336 B. NB_LOOPS is the number of loops surrounding the references: the
1337 size of the classic distance/direction vectors. */
1339 static struct data_dependence_relation
*
1340 initialize_data_dependence_relation (struct data_reference
*a
,
1341 struct data_reference
*b
,
1342 VEC (loop_p
, heap
) *loop_nest
)
1344 struct data_dependence_relation
*res
;
1347 res
= XNEW (struct data_dependence_relation
);
1350 DDR_LOOP_NEST (res
) = NULL
;
1351 DDR_REVERSED_P (res
) = false;
1352 DDR_SUBSCRIPTS (res
) = NULL
;
1353 DDR_DIR_VECTS (res
) = NULL
;
1354 DDR_DIST_VECTS (res
) = NULL
;
1356 if (a
== NULL
|| b
== NULL
)
1358 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1362 /* If the data references do not alias, then they are independent. */
1363 if (!dr_may_alias_p (a
, b
, loop_nest
!= NULL
))
1365 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1369 /* When the references are exactly the same, don't spend time doing
1370 the data dependence tests, just initialize the ddr and return. */
1371 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1373 DDR_AFFINE_P (res
) = true;
1374 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1375 DDR_SUBSCRIPTS (res
) = VEC_alloc (subscript_p
, heap
, DR_NUM_DIMENSIONS (a
));
1376 DDR_LOOP_NEST (res
) = loop_nest
;
1377 DDR_INNER_LOOP (res
) = 0;
1378 DDR_SELF_REFERENCE (res
) = true;
1379 compute_self_dependence (res
);
1383 /* If the references do not access the same object, we do not know
1384 whether they alias or not. */
1385 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1387 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1391 /* If the base of the object is not invariant in the loop nest, we cannot
1392 analyze it. TODO -- in fact, it would suffice to record that there may
1393 be arbitrary dependences in the loops where the base object varies. */
1395 && !object_address_invariant_in_loop_p (VEC_index (loop_p
, loop_nest
, 0),
1396 DR_BASE_OBJECT (a
)))
1398 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1402 /* If the number of dimensions of the access to not agree we can have
1403 a pointer access to a component of the array element type and an
1404 array access while the base-objects are still the same. Punt. */
1405 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1407 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1411 DDR_AFFINE_P (res
) = true;
1412 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1413 DDR_SUBSCRIPTS (res
) = VEC_alloc (subscript_p
, heap
, DR_NUM_DIMENSIONS (a
));
1414 DDR_LOOP_NEST (res
) = loop_nest
;
1415 DDR_INNER_LOOP (res
) = 0;
1416 DDR_SELF_REFERENCE (res
) = false;
1418 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1420 struct subscript
*subscript
;
1422 subscript
= XNEW (struct subscript
);
1423 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1424 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1425 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1426 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1427 VEC_safe_push (subscript_p
, heap
, DDR_SUBSCRIPTS (res
), subscript
);
1433 /* Frees memory used by the conflict function F. */
1436 free_conflict_function (conflict_function
*f
)
1440 if (CF_NONTRIVIAL_P (f
))
1442 for (i
= 0; i
< f
->n
; i
++)
1443 affine_fn_free (f
->fns
[i
]);
1448 /* Frees memory used by SUBSCRIPTS. */
1451 free_subscripts (VEC (subscript_p
, heap
) *subscripts
)
1456 FOR_EACH_VEC_ELT (subscript_p
, subscripts
, i
, s
)
1458 free_conflict_function (s
->conflicting_iterations_in_a
);
1459 free_conflict_function (s
->conflicting_iterations_in_b
);
1462 VEC_free (subscript_p
, heap
, subscripts
);
1465 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1469 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1472 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1474 fprintf (dump_file
, "(dependence classified: ");
1475 print_generic_expr (dump_file
, chrec
, 0);
1476 fprintf (dump_file
, ")\n");
1479 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1480 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1481 DDR_SUBSCRIPTS (ddr
) = NULL
;
1484 /* The dependence relation DDR cannot be represented by a distance
1488 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1490 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1491 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1493 DDR_AFFINE_P (ddr
) = false;
1498 /* This section contains the classic Banerjee tests. */
1500 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1501 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1504 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1506 return (evolution_function_is_constant_p (chrec_a
)
1507 && evolution_function_is_constant_p (chrec_b
));
1510 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1511 variable, i.e., if the SIV (Single Index Variable) test is true. */
1514 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1516 if ((evolution_function_is_constant_p (chrec_a
)
1517 && evolution_function_is_univariate_p (chrec_b
))
1518 || (evolution_function_is_constant_p (chrec_b
)
1519 && evolution_function_is_univariate_p (chrec_a
)))
1522 if (evolution_function_is_univariate_p (chrec_a
)
1523 && evolution_function_is_univariate_p (chrec_b
))
1525 switch (TREE_CODE (chrec_a
))
1527 case POLYNOMIAL_CHREC
:
1528 switch (TREE_CODE (chrec_b
))
1530 case POLYNOMIAL_CHREC
:
1531 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1546 /* Creates a conflict function with N dimensions. The affine functions
1547 in each dimension follow. */
1549 static conflict_function
*
1550 conflict_fn (unsigned n
, ...)
1553 conflict_function
*ret
= XCNEW (conflict_function
);
1556 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1560 for (i
= 0; i
< n
; i
++)
1561 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1567 /* Returns constant affine function with value CST. */
1570 affine_fn_cst (tree cst
)
1572 affine_fn fn
= VEC_alloc (tree
, heap
, 1);
1573 VEC_quick_push (tree
, fn
, cst
);
1577 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1580 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1582 affine_fn fn
= VEC_alloc (tree
, heap
, dim
+ 1);
1585 gcc_assert (dim
> 0);
1586 VEC_quick_push (tree
, fn
, cst
);
1587 for (i
= 1; i
< dim
; i
++)
1588 VEC_quick_push (tree
, fn
, integer_zero_node
);
1589 VEC_quick_push (tree
, fn
, coef
);
1593 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1594 *OVERLAPS_B are initialized to the functions that describe the
1595 relation between the elements accessed twice by CHREC_A and
1596 CHREC_B. For k >= 0, the following property is verified:
1598 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1601 analyze_ziv_subscript (tree chrec_a
,
1603 conflict_function
**overlaps_a
,
1604 conflict_function
**overlaps_b
,
1605 tree
*last_conflicts
)
1607 tree type
, difference
;
1608 dependence_stats
.num_ziv
++;
1610 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1611 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1613 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1614 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1615 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1616 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1618 switch (TREE_CODE (difference
))
1621 if (integer_zerop (difference
))
1623 /* The difference is equal to zero: the accessed index
1624 overlaps for each iteration in the loop. */
1625 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1626 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1627 *last_conflicts
= chrec_dont_know
;
1628 dependence_stats
.num_ziv_dependent
++;
1632 /* The accesses do not overlap. */
1633 *overlaps_a
= conflict_fn_no_dependence ();
1634 *overlaps_b
= conflict_fn_no_dependence ();
1635 *last_conflicts
= integer_zero_node
;
1636 dependence_stats
.num_ziv_independent
++;
1641 /* We're not sure whether the indexes overlap. For the moment,
1642 conservatively answer "don't know". */
1643 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1644 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1646 *overlaps_a
= conflict_fn_not_known ();
1647 *overlaps_b
= conflict_fn_not_known ();
1648 *last_conflicts
= chrec_dont_know
;
1649 dependence_stats
.num_ziv_unimplemented
++;
1653 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1654 fprintf (dump_file
, ")\n");
1657 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1658 and only if it fits to the int type. If this is not the case, or the
1659 bound on the number of iterations of LOOP could not be derived, returns
1663 max_stmt_executions_tree (struct loop
*loop
)
1667 if (!max_stmt_executions (loop
, true, &nit
))
1668 return chrec_dont_know
;
1670 if (!double_int_fits_to_tree_p (unsigned_type_node
, nit
))
1671 return chrec_dont_know
;
1673 return double_int_to_tree (unsigned_type_node
, nit
);
1676 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1677 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1678 *OVERLAPS_B are initialized to the functions that describe the
1679 relation between the elements accessed twice by CHREC_A and
1680 CHREC_B. For k >= 0, the following property is verified:
1682 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1685 analyze_siv_subscript_cst_affine (tree chrec_a
,
1687 conflict_function
**overlaps_a
,
1688 conflict_function
**overlaps_b
,
1689 tree
*last_conflicts
)
1691 bool value0
, value1
, value2
;
1692 tree type
, difference
, tmp
;
1694 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1695 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1696 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1697 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1699 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1701 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1702 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1704 dependence_stats
.num_siv_unimplemented
++;
1705 *overlaps_a
= conflict_fn_not_known ();
1706 *overlaps_b
= conflict_fn_not_known ();
1707 *last_conflicts
= chrec_dont_know
;
1712 if (value0
== false)
1714 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1716 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1717 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1719 *overlaps_a
= conflict_fn_not_known ();
1720 *overlaps_b
= conflict_fn_not_known ();
1721 *last_conflicts
= chrec_dont_know
;
1722 dependence_stats
.num_siv_unimplemented
++;
1731 chrec_b = {10, +, 1}
1734 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1736 HOST_WIDE_INT numiter
;
1737 struct loop
*loop
= get_chrec_loop (chrec_b
);
1739 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1740 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1741 fold_build1 (ABS_EXPR
, type
, difference
),
1742 CHREC_RIGHT (chrec_b
));
1743 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1744 *last_conflicts
= integer_one_node
;
1747 /* Perform weak-zero siv test to see if overlap is
1748 outside the loop bounds. */
1749 numiter
= max_stmt_executions_int (loop
, true);
1752 && compare_tree_int (tmp
, numiter
) > 0)
1754 free_conflict_function (*overlaps_a
);
1755 free_conflict_function (*overlaps_b
);
1756 *overlaps_a
= conflict_fn_no_dependence ();
1757 *overlaps_b
= conflict_fn_no_dependence ();
1758 *last_conflicts
= integer_zero_node
;
1759 dependence_stats
.num_siv_independent
++;
1762 dependence_stats
.num_siv_dependent
++;
1766 /* When the step does not divide the difference, there are
1770 *overlaps_a
= conflict_fn_no_dependence ();
1771 *overlaps_b
= conflict_fn_no_dependence ();
1772 *last_conflicts
= integer_zero_node
;
1773 dependence_stats
.num_siv_independent
++;
1782 chrec_b = {10, +, -1}
1784 In this case, chrec_a will not overlap with chrec_b. */
1785 *overlaps_a
= conflict_fn_no_dependence ();
1786 *overlaps_b
= conflict_fn_no_dependence ();
1787 *last_conflicts
= integer_zero_node
;
1788 dependence_stats
.num_siv_independent
++;
1795 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
1797 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1798 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1800 *overlaps_a
= conflict_fn_not_known ();
1801 *overlaps_b
= conflict_fn_not_known ();
1802 *last_conflicts
= chrec_dont_know
;
1803 dependence_stats
.num_siv_unimplemented
++;
1808 if (value2
== false)
1812 chrec_b = {10, +, -1}
1814 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1816 HOST_WIDE_INT numiter
;
1817 struct loop
*loop
= get_chrec_loop (chrec_b
);
1819 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1820 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
1821 CHREC_RIGHT (chrec_b
));
1822 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1823 *last_conflicts
= integer_one_node
;
1825 /* Perform weak-zero siv test to see if overlap is
1826 outside the loop bounds. */
1827 numiter
= max_stmt_executions_int (loop
, true);
1830 && compare_tree_int (tmp
, numiter
) > 0)
1832 free_conflict_function (*overlaps_a
);
1833 free_conflict_function (*overlaps_b
);
1834 *overlaps_a
= conflict_fn_no_dependence ();
1835 *overlaps_b
= conflict_fn_no_dependence ();
1836 *last_conflicts
= integer_zero_node
;
1837 dependence_stats
.num_siv_independent
++;
1840 dependence_stats
.num_siv_dependent
++;
1844 /* When the step does not divide the difference, there
1848 *overlaps_a
= conflict_fn_no_dependence ();
1849 *overlaps_b
= conflict_fn_no_dependence ();
1850 *last_conflicts
= integer_zero_node
;
1851 dependence_stats
.num_siv_independent
++;
1861 In this case, chrec_a will not overlap with chrec_b. */
1862 *overlaps_a
= conflict_fn_no_dependence ();
1863 *overlaps_b
= conflict_fn_no_dependence ();
1864 *last_conflicts
= integer_zero_node
;
1865 dependence_stats
.num_siv_independent
++;
1873 /* Helper recursive function for initializing the matrix A. Returns
1874 the initial value of CHREC. */
1877 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
1881 switch (TREE_CODE (chrec
))
1883 case POLYNOMIAL_CHREC
:
1884 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
1886 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
1887 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
1893 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1894 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
1896 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
1901 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1902 return chrec_convert (chrec_type (chrec
), op
, NULL
);
1907 /* Handle ~X as -1 - X. */
1908 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1909 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
1910 build_int_cst (TREE_TYPE (chrec
), -1), op
);
1922 #define FLOOR_DIV(x,y) ((x) / (y))
1924 /* Solves the special case of the Diophantine equation:
1925 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1927 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1928 number of iterations that loops X and Y run. The overlaps will be
1929 constructed as evolutions in dimension DIM. */
1932 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
1933 affine_fn
*overlaps_a
,
1934 affine_fn
*overlaps_b
,
1935 tree
*last_conflicts
, int dim
)
1937 if (((step_a
> 0 && step_b
> 0)
1938 || (step_a
< 0 && step_b
< 0)))
1940 int step_overlaps_a
, step_overlaps_b
;
1941 int gcd_steps_a_b
, last_conflict
, tau2
;
1943 gcd_steps_a_b
= gcd (step_a
, step_b
);
1944 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
1945 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
1949 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
1950 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
1951 last_conflict
= tau2
;
1952 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
1955 *last_conflicts
= chrec_dont_know
;
1957 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
1958 build_int_cst (NULL_TREE
,
1960 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
1961 build_int_cst (NULL_TREE
,
1967 *overlaps_a
= affine_fn_cst (integer_zero_node
);
1968 *overlaps_b
= affine_fn_cst (integer_zero_node
);
1969 *last_conflicts
= integer_zero_node
;
1973 /* Solves the special case of a Diophantine equation where CHREC_A is
1974 an affine bivariate function, and CHREC_B is an affine univariate
1975 function. For example,
1977 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1979 has the following overlapping functions:
1981 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1982 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1983 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1985 FORNOW: This is a specialized implementation for a case occurring in
1986 a common benchmark. Implement the general algorithm. */
1989 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
1990 conflict_function
**overlaps_a
,
1991 conflict_function
**overlaps_b
,
1992 tree
*last_conflicts
)
1994 bool xz_p
, yz_p
, xyz_p
;
1995 int step_x
, step_y
, step_z
;
1996 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
1997 affine_fn overlaps_a_xz
, overlaps_b_xz
;
1998 affine_fn overlaps_a_yz
, overlaps_b_yz
;
1999 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
2000 affine_fn ova1
, ova2
, ovb
;
2001 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
2003 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
2004 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
2005 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
2008 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)), true);
2009 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
), true);
2010 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
), true);
2012 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
2014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2015 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
2017 *overlaps_a
= conflict_fn_not_known ();
2018 *overlaps_b
= conflict_fn_not_known ();
2019 *last_conflicts
= chrec_dont_know
;
2023 niter
= MIN (niter_x
, niter_z
);
2024 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
2027 &last_conflicts_xz
, 1);
2028 niter
= MIN (niter_y
, niter_z
);
2029 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
2032 &last_conflicts_yz
, 2);
2033 niter
= MIN (niter_x
, niter_z
);
2034 niter
= MIN (niter_y
, niter
);
2035 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
2038 &last_conflicts_xyz
, 3);
2040 xz_p
= !integer_zerop (last_conflicts_xz
);
2041 yz_p
= !integer_zerop (last_conflicts_yz
);
2042 xyz_p
= !integer_zerop (last_conflicts_xyz
);
2044 if (xz_p
|| yz_p
|| xyz_p
)
2046 ova1
= affine_fn_cst (integer_zero_node
);
2047 ova2
= affine_fn_cst (integer_zero_node
);
2048 ovb
= affine_fn_cst (integer_zero_node
);
2051 affine_fn t0
= ova1
;
2054 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2055 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2056 affine_fn_free (t0
);
2057 affine_fn_free (t2
);
2058 *last_conflicts
= last_conflicts_xz
;
2062 affine_fn t0
= ova2
;
2065 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2066 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2067 affine_fn_free (t0
);
2068 affine_fn_free (t2
);
2069 *last_conflicts
= last_conflicts_yz
;
2073 affine_fn t0
= ova1
;
2074 affine_fn t2
= ova2
;
2077 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2078 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2079 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2080 affine_fn_free (t0
);
2081 affine_fn_free (t2
);
2082 affine_fn_free (t4
);
2083 *last_conflicts
= last_conflicts_xyz
;
2085 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2086 *overlaps_b
= conflict_fn (1, ovb
);
2090 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2091 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2092 *last_conflicts
= integer_zero_node
;
2095 affine_fn_free (overlaps_a_xz
);
2096 affine_fn_free (overlaps_b_xz
);
2097 affine_fn_free (overlaps_a_yz
);
2098 affine_fn_free (overlaps_b_yz
);
2099 affine_fn_free (overlaps_a_xyz
);
2100 affine_fn_free (overlaps_b_xyz
);
2103 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2106 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2109 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2112 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2115 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2120 for (i
= 0; i
< m
; i
++)
2121 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2124 /* Store the N x N identity matrix in MAT. */
2127 lambda_matrix_id (lambda_matrix mat
, int size
)
2131 for (i
= 0; i
< size
; i
++)
2132 for (j
= 0; j
< size
; j
++)
2133 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2136 /* Return the first nonzero element of vector VEC1 between START and N.
2137 We must have START <= N. Returns N if VEC1 is the zero vector. */
2140 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2143 while (j
< n
&& vec1
[j
] == 0)
2148 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2149 R2 = R2 + CONST1 * R1. */
2152 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2159 for (i
= 0; i
< n
; i
++)
2160 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2163 /* Swap rows R1 and R2 in matrix MAT. */
2166 lambda_matrix_row_exchange (lambda_matrix mat
, int r1
, int r2
)
2175 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2176 and store the result in VEC2. */
2179 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2180 int size
, int const1
)
2185 lambda_vector_clear (vec2
, size
);
2187 for (i
= 0; i
< size
; i
++)
2188 vec2
[i
] = const1
* vec1
[i
];
2191 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2194 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2197 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2200 /* Negate row R1 of matrix MAT which has N columns. */
2203 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2205 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2208 /* Return true if two vectors are equal. */
2211 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2214 for (i
= 0; i
< size
; i
++)
2215 if (vec1
[i
] != vec2
[i
])
2220 /* Given an M x N integer matrix A, this function determines an M x
2221 M unimodular matrix U, and an M x N echelon matrix S such that
2222 "U.A = S". This decomposition is also known as "right Hermite".
2224 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2225 Restructuring Compilers" Utpal Banerjee. */
2228 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2229 lambda_matrix S
, lambda_matrix U
)
2233 lambda_matrix_copy (A
, S
, m
, n
);
2234 lambda_matrix_id (U
, m
);
2236 for (j
= 0; j
< n
; j
++)
2238 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2241 for (i
= m
- 1; i
>= i0
; i
--)
2243 while (S
[i
][j
] != 0)
2245 int sigma
, factor
, a
, b
;
2249 sigma
= (a
* b
< 0) ? -1: 1;
2252 factor
= sigma
* (a
/ b
);
2254 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2255 lambda_matrix_row_exchange (S
, i
, i
-1);
2257 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2258 lambda_matrix_row_exchange (U
, i
, i
-1);
2265 /* Determines the overlapping elements due to accesses CHREC_A and
2266 CHREC_B, that are affine functions. This function cannot handle
2267 symbolic evolution functions, ie. when initial conditions are
2268 parameters, because it uses lambda matrices of integers. */
2271 analyze_subscript_affine_affine (tree chrec_a
,
2273 conflict_function
**overlaps_a
,
2274 conflict_function
**overlaps_b
,
2275 tree
*last_conflicts
)
2277 unsigned nb_vars_a
, nb_vars_b
, dim
;
2278 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2279 lambda_matrix A
, U
, S
;
2280 struct obstack scratch_obstack
;
2282 if (eq_evolutions_p (chrec_a
, chrec_b
))
2284 /* The accessed index overlaps for each iteration in the
2286 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2287 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2288 *last_conflicts
= chrec_dont_know
;
2291 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2292 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2294 /* For determining the initial intersection, we have to solve a
2295 Diophantine equation. This is the most time consuming part.
2297 For answering to the question: "Is there a dependence?" we have
2298 to prove that there exists a solution to the Diophantine
2299 equation, and that the solution is in the iteration domain,
2300 i.e. the solution is positive or zero, and that the solution
2301 happens before the upper bound loop.nb_iterations. Otherwise
2302 there is no dependence. This function outputs a description of
2303 the iterations that hold the intersections. */
2305 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2306 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2308 gcc_obstack_init (&scratch_obstack
);
2310 dim
= nb_vars_a
+ nb_vars_b
;
2311 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2312 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2313 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2315 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2316 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2317 gamma
= init_b
- init_a
;
2319 /* Don't do all the hard work of solving the Diophantine equation
2320 when we already know the solution: for example,
2323 | gamma = 3 - 3 = 0.
2324 Then the first overlap occurs during the first iterations:
2325 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2329 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2331 HOST_WIDE_INT step_a
, step_b
;
2332 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2335 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
), true);
2336 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
), true);
2337 niter
= MIN (niter_a
, niter_b
);
2338 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2339 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2341 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2344 *overlaps_a
= conflict_fn (1, ova
);
2345 *overlaps_b
= conflict_fn (1, ovb
);
2348 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2349 compute_overlap_steps_for_affine_1_2
2350 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2352 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2353 compute_overlap_steps_for_affine_1_2
2354 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2358 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2359 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2360 *overlaps_a
= conflict_fn_not_known ();
2361 *overlaps_b
= conflict_fn_not_known ();
2362 *last_conflicts
= chrec_dont_know
;
2364 goto end_analyze_subs_aa
;
2368 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2373 lambda_matrix_row_negate (U
, dim
, 0);
2375 gcd_alpha_beta
= S
[0][0];
2377 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2378 but that is a quite strange case. Instead of ICEing, answer
2380 if (gcd_alpha_beta
== 0)
2382 *overlaps_a
= conflict_fn_not_known ();
2383 *overlaps_b
= conflict_fn_not_known ();
2384 *last_conflicts
= chrec_dont_know
;
2385 goto end_analyze_subs_aa
;
2388 /* The classic "gcd-test". */
2389 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2391 /* The "gcd-test" has determined that there is no integer
2392 solution, i.e. there is no dependence. */
2393 *overlaps_a
= conflict_fn_no_dependence ();
2394 *overlaps_b
= conflict_fn_no_dependence ();
2395 *last_conflicts
= integer_zero_node
;
2398 /* Both access functions are univariate. This includes SIV and MIV cases. */
2399 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2401 /* Both functions should have the same evolution sign. */
2402 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2403 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2405 /* The solutions are given by:
2407 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2410 For a given integer t. Using the following variables,
2412 | i0 = u11 * gamma / gcd_alpha_beta
2413 | j0 = u12 * gamma / gcd_alpha_beta
2420 | y0 = j0 + j1 * t. */
2421 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2423 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2424 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2428 if ((i1
== 0 && i0
< 0)
2429 || (j1
== 0 && j0
< 0))
2431 /* There is no solution.
2432 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2433 falls in here, but for the moment we don't look at the
2434 upper bound of the iteration domain. */
2435 *overlaps_a
= conflict_fn_no_dependence ();
2436 *overlaps_b
= conflict_fn_no_dependence ();
2437 *last_conflicts
= integer_zero_node
;
2438 goto end_analyze_subs_aa
;
2441 if (i1
> 0 && j1
> 0)
2443 HOST_WIDE_INT niter_a
= max_stmt_executions_int
2444 (get_chrec_loop (chrec_a
), true);
2445 HOST_WIDE_INT niter_b
= max_stmt_executions_int
2446 (get_chrec_loop (chrec_b
), true);
2447 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2449 /* (X0, Y0) is a solution of the Diophantine equation:
2450 "chrec_a (X0) = chrec_b (Y0)". */
2451 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2453 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2454 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2456 /* (X1, Y1) is the smallest positive solution of the eq
2457 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2458 first conflict occurs. */
2459 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2460 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2461 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2465 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2466 FLOOR_DIV (niter
- j0
, j1
));
2467 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2469 /* If the overlap occurs outside of the bounds of the
2470 loop, there is no dependence. */
2471 if (x1
>= niter
|| y1
>= niter
)
2473 *overlaps_a
= conflict_fn_no_dependence ();
2474 *overlaps_b
= conflict_fn_no_dependence ();
2475 *last_conflicts
= integer_zero_node
;
2476 goto end_analyze_subs_aa
;
2479 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2482 *last_conflicts
= chrec_dont_know
;
2486 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2488 build_int_cst (NULL_TREE
, i1
)));
2491 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2493 build_int_cst (NULL_TREE
, j1
)));
2497 /* FIXME: For the moment, the upper bound of the
2498 iteration domain for i and j is not checked. */
2499 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2500 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2501 *overlaps_a
= conflict_fn_not_known ();
2502 *overlaps_b
= conflict_fn_not_known ();
2503 *last_conflicts
= chrec_dont_know
;
2508 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2509 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2510 *overlaps_a
= conflict_fn_not_known ();
2511 *overlaps_b
= conflict_fn_not_known ();
2512 *last_conflicts
= chrec_dont_know
;
2517 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2518 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2519 *overlaps_a
= conflict_fn_not_known ();
2520 *overlaps_b
= conflict_fn_not_known ();
2521 *last_conflicts
= chrec_dont_know
;
2524 end_analyze_subs_aa
:
2525 obstack_free (&scratch_obstack
, NULL
);
2526 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2528 fprintf (dump_file
, " (overlaps_a = ");
2529 dump_conflict_function (dump_file
, *overlaps_a
);
2530 fprintf (dump_file
, ")\n (overlaps_b = ");
2531 dump_conflict_function (dump_file
, *overlaps_b
);
2532 fprintf (dump_file
, ")\n");
2533 fprintf (dump_file
, ")\n");
2537 /* Returns true when analyze_subscript_affine_affine can be used for
2538 determining the dependence relation between chrec_a and chrec_b,
2539 that contain symbols. This function modifies chrec_a and chrec_b
2540 such that the analysis result is the same, and such that they don't
2541 contain symbols, and then can safely be passed to the analyzer.
2543 Example: The analysis of the following tuples of evolutions produce
2544 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2547 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2548 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2552 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2554 tree diff
, type
, left_a
, left_b
, right_b
;
2556 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2557 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2558 /* FIXME: For the moment not handled. Might be refined later. */
2561 type
= chrec_type (*chrec_a
);
2562 left_a
= CHREC_LEFT (*chrec_a
);
2563 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2564 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2566 if (!evolution_function_is_constant_p (diff
))
2569 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2570 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2572 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2573 diff
, CHREC_RIGHT (*chrec_a
));
2574 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2575 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2576 build_int_cst (type
, 0),
2581 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2582 *OVERLAPS_B are initialized to the functions that describe the
2583 relation between the elements accessed twice by CHREC_A and
2584 CHREC_B. For k >= 0, the following property is verified:
2586 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2589 analyze_siv_subscript (tree chrec_a
,
2591 conflict_function
**overlaps_a
,
2592 conflict_function
**overlaps_b
,
2593 tree
*last_conflicts
,
2596 dependence_stats
.num_siv
++;
2598 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2599 fprintf (dump_file
, "(analyze_siv_subscript \n");
2601 if (evolution_function_is_constant_p (chrec_a
)
2602 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2603 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2604 overlaps_a
, overlaps_b
, last_conflicts
);
2606 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2607 && evolution_function_is_constant_p (chrec_b
))
2608 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2609 overlaps_b
, overlaps_a
, last_conflicts
);
2611 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2612 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2614 if (!chrec_contains_symbols (chrec_a
)
2615 && !chrec_contains_symbols (chrec_b
))
2617 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2618 overlaps_a
, overlaps_b
,
2621 if (CF_NOT_KNOWN_P (*overlaps_a
)
2622 || CF_NOT_KNOWN_P (*overlaps_b
))
2623 dependence_stats
.num_siv_unimplemented
++;
2624 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2625 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2626 dependence_stats
.num_siv_independent
++;
2628 dependence_stats
.num_siv_dependent
++;
2630 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2633 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2634 overlaps_a
, overlaps_b
,
2637 if (CF_NOT_KNOWN_P (*overlaps_a
)
2638 || CF_NOT_KNOWN_P (*overlaps_b
))
2639 dependence_stats
.num_siv_unimplemented
++;
2640 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2641 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2642 dependence_stats
.num_siv_independent
++;
2644 dependence_stats
.num_siv_dependent
++;
2647 goto siv_subscript_dontknow
;
2652 siv_subscript_dontknow
:;
2653 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2654 fprintf (dump_file
, "siv test failed: unimplemented.\n");
2655 *overlaps_a
= conflict_fn_not_known ();
2656 *overlaps_b
= conflict_fn_not_known ();
2657 *last_conflicts
= chrec_dont_know
;
2658 dependence_stats
.num_siv_unimplemented
++;
2661 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2662 fprintf (dump_file
, ")\n");
2665 /* Returns false if we can prove that the greatest common divisor of the steps
2666 of CHREC does not divide CST, false otherwise. */
2669 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2671 HOST_WIDE_INT cd
= 0, val
;
2674 if (!host_integerp (cst
, 0))
2676 val
= tree_low_cst (cst
, 0);
2678 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2680 step
= CHREC_RIGHT (chrec
);
2681 if (!host_integerp (step
, 0))
2683 cd
= gcd (cd
, tree_low_cst (step
, 0));
2684 chrec
= CHREC_LEFT (chrec
);
2687 return val
% cd
== 0;
2690 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2691 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2692 functions that describe the relation between the elements accessed
2693 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2696 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2699 analyze_miv_subscript (tree chrec_a
,
2701 conflict_function
**overlaps_a
,
2702 conflict_function
**overlaps_b
,
2703 tree
*last_conflicts
,
2704 struct loop
*loop_nest
)
2706 tree type
, difference
;
2708 dependence_stats
.num_miv
++;
2709 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2710 fprintf (dump_file
, "(analyze_miv_subscript \n");
2712 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2713 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2714 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2715 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2717 if (eq_evolutions_p (chrec_a
, chrec_b
))
2719 /* Access functions are the same: all the elements are accessed
2720 in the same order. */
2721 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2722 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2723 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
2724 dependence_stats
.num_miv_dependent
++;
2727 else if (evolution_function_is_constant_p (difference
)
2728 /* For the moment, the following is verified:
2729 evolution_function_is_affine_multivariate_p (chrec_a,
2731 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2733 /* testsuite/.../ssa-chrec-33.c
2734 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2736 The difference is 1, and all the evolution steps are multiples
2737 of 2, consequently there are no overlapping elements. */
2738 *overlaps_a
= conflict_fn_no_dependence ();
2739 *overlaps_b
= conflict_fn_no_dependence ();
2740 *last_conflicts
= integer_zero_node
;
2741 dependence_stats
.num_miv_independent
++;
2744 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2745 && !chrec_contains_symbols (chrec_a
)
2746 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2747 && !chrec_contains_symbols (chrec_b
))
2749 /* testsuite/.../ssa-chrec-35.c
2750 {0, +, 1}_2 vs. {0, +, 1}_3
2751 the overlapping elements are respectively located at iterations:
2752 {0, +, 1}_x and {0, +, 1}_x,
2753 in other words, we have the equality:
2754 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2757 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2758 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2760 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2761 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2763 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2764 overlaps_a
, overlaps_b
, last_conflicts
);
2766 if (CF_NOT_KNOWN_P (*overlaps_a
)
2767 || CF_NOT_KNOWN_P (*overlaps_b
))
2768 dependence_stats
.num_miv_unimplemented
++;
2769 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2770 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2771 dependence_stats
.num_miv_independent
++;
2773 dependence_stats
.num_miv_dependent
++;
2778 /* When the analysis is too difficult, answer "don't know". */
2779 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2780 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
2782 *overlaps_a
= conflict_fn_not_known ();
2783 *overlaps_b
= conflict_fn_not_known ();
2784 *last_conflicts
= chrec_dont_know
;
2785 dependence_stats
.num_miv_unimplemented
++;
2788 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2789 fprintf (dump_file
, ")\n");
2792 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2793 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2794 OVERLAP_ITERATIONS_B are initialized with two functions that
2795 describe the iterations that contain conflicting elements.
2797 Remark: For an integer k >= 0, the following equality is true:
2799 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2803 analyze_overlapping_iterations (tree chrec_a
,
2805 conflict_function
**overlap_iterations_a
,
2806 conflict_function
**overlap_iterations_b
,
2807 tree
*last_conflicts
, struct loop
*loop_nest
)
2809 unsigned int lnn
= loop_nest
->num
;
2811 dependence_stats
.num_subscript_tests
++;
2813 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2815 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
2816 fprintf (dump_file
, " (chrec_a = ");
2817 print_generic_expr (dump_file
, chrec_a
, 0);
2818 fprintf (dump_file
, ")\n (chrec_b = ");
2819 print_generic_expr (dump_file
, chrec_b
, 0);
2820 fprintf (dump_file
, ")\n");
2823 if (chrec_a
== NULL_TREE
2824 || chrec_b
== NULL_TREE
2825 || chrec_contains_undetermined (chrec_a
)
2826 || chrec_contains_undetermined (chrec_b
))
2828 dependence_stats
.num_subscript_undetermined
++;
2830 *overlap_iterations_a
= conflict_fn_not_known ();
2831 *overlap_iterations_b
= conflict_fn_not_known ();
2834 /* If they are the same chrec, and are affine, they overlap
2835 on every iteration. */
2836 else if (eq_evolutions_p (chrec_a
, chrec_b
)
2837 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2838 || operand_equal_p (chrec_a
, chrec_b
, 0)))
2840 dependence_stats
.num_same_subscript_function
++;
2841 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2842 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2843 *last_conflicts
= chrec_dont_know
;
2846 /* If they aren't the same, and aren't affine, we can't do anything
2848 else if ((chrec_contains_symbols (chrec_a
)
2849 || chrec_contains_symbols (chrec_b
))
2850 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2851 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
2853 dependence_stats
.num_subscript_undetermined
++;
2854 *overlap_iterations_a
= conflict_fn_not_known ();
2855 *overlap_iterations_b
= conflict_fn_not_known ();
2858 else if (ziv_subscript_p (chrec_a
, chrec_b
))
2859 analyze_ziv_subscript (chrec_a
, chrec_b
,
2860 overlap_iterations_a
, overlap_iterations_b
,
2863 else if (siv_subscript_p (chrec_a
, chrec_b
))
2864 analyze_siv_subscript (chrec_a
, chrec_b
,
2865 overlap_iterations_a
, overlap_iterations_b
,
2866 last_conflicts
, lnn
);
2869 analyze_miv_subscript (chrec_a
, chrec_b
,
2870 overlap_iterations_a
, overlap_iterations_b
,
2871 last_conflicts
, loop_nest
);
2873 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2875 fprintf (dump_file
, " (overlap_iterations_a = ");
2876 dump_conflict_function (dump_file
, *overlap_iterations_a
);
2877 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
2878 dump_conflict_function (dump_file
, *overlap_iterations_b
);
2879 fprintf (dump_file
, ")\n");
2880 fprintf (dump_file
, ")\n");
2884 /* Helper function for uniquely inserting distance vectors. */
2887 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
2892 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, v
)
2893 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
2896 VEC_safe_push (lambda_vector
, heap
, DDR_DIST_VECTS (ddr
), dist_v
);
2899 /* Helper function for uniquely inserting direction vectors. */
2902 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
2907 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIR_VECTS (ddr
), i
, v
)
2908 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
2911 VEC_safe_push (lambda_vector
, heap
, DDR_DIR_VECTS (ddr
), dir_v
);
2914 /* Add a distance of 1 on all the loops outer than INDEX. If we
2915 haven't yet determined a distance for this outer loop, push a new
2916 distance vector composed of the previous distance, and a distance
2917 of 1 for this outer loop. Example:
2925 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2926 save (0, 1), then we have to save (1, 0). */
2929 add_outer_distances (struct data_dependence_relation
*ddr
,
2930 lambda_vector dist_v
, int index
)
2932 /* For each outer loop where init_v is not set, the accesses are
2933 in dependence of distance 1 in the loop. */
2934 while (--index
>= 0)
2936 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
2937 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
2939 save_dist_v (ddr
, save_v
);
2943 /* Return false when fail to represent the data dependence as a
2944 distance vector. INIT_B is set to true when a component has been
2945 added to the distance vector DIST_V. INDEX_CARRY is then set to
2946 the index in DIST_V that carries the dependence. */
2949 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
2950 struct data_reference
*ddr_a
,
2951 struct data_reference
*ddr_b
,
2952 lambda_vector dist_v
, bool *init_b
,
2956 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
2958 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2960 tree access_fn_a
, access_fn_b
;
2961 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
2963 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
2965 non_affine_dependence_relation (ddr
);
2969 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
2970 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
2972 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
2973 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
2976 int var_a
= CHREC_VARIABLE (access_fn_a
);
2977 int var_b
= CHREC_VARIABLE (access_fn_b
);
2980 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
2982 non_affine_dependence_relation (ddr
);
2986 dist
= int_cst_value (SUB_DISTANCE (subscript
));
2987 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
2988 *index_carry
= MIN (index
, *index_carry
);
2990 /* This is the subscript coupling test. If we have already
2991 recorded a distance for this loop (a distance coming from
2992 another subscript), it should be the same. For example,
2993 in the following code, there is no dependence:
3000 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
3002 finalize_ddr_dependent (ddr
, chrec_known
);
3006 dist_v
[index
] = dist
;
3010 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
3012 /* This can be for example an affine vs. constant dependence
3013 (T[i] vs. T[3]) that is not an affine dependence and is
3014 not representable as a distance vector. */
3015 non_affine_dependence_relation (ddr
);
3023 /* Return true when the DDR contains only constant access functions. */
3026 constant_access_functions (const struct data_dependence_relation
*ddr
)
3030 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3031 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
3032 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
3038 /* Helper function for the case where DDR_A and DDR_B are the same
3039 multivariate access function with a constant step. For an example
3043 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3046 tree c_1
= CHREC_LEFT (c_2
);
3047 tree c_0
= CHREC_LEFT (c_1
);
3048 lambda_vector dist_v
;
3051 /* Polynomials with more than 2 variables are not handled yet. When
3052 the evolution steps are parameters, it is not possible to
3053 represent the dependence using classical distance vectors. */
3054 if (TREE_CODE (c_0
) != INTEGER_CST
3055 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3056 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3058 DDR_AFFINE_P (ddr
) = false;
3062 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3063 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3065 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3066 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3067 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3068 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3081 save_dist_v (ddr
, dist_v
);
3083 add_outer_distances (ddr
, dist_v
, x_1
);
3086 /* Helper function for the case where DDR_A and DDR_B are the same
3087 access functions. */
3090 add_other_self_distances (struct data_dependence_relation
*ddr
)
3092 lambda_vector dist_v
;
3094 int index_carry
= DDR_NB_LOOPS (ddr
);
3096 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3098 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3100 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3102 if (!evolution_function_is_univariate_p (access_fun
))
3104 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3106 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3110 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3112 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3113 add_multivariate_self_dist (ddr
, access_fun
);
3115 /* The evolution step is not constant: it varies in
3116 the outer loop, so this cannot be represented by a
3117 distance vector. For example in pr34635.c the
3118 evolution is {0, +, {0, +, 4}_1}_2. */
3119 DDR_AFFINE_P (ddr
) = false;
3124 index_carry
= MIN (index_carry
,
3125 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3126 DDR_LOOP_NEST (ddr
)));
3130 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3131 add_outer_distances (ddr
, dist_v
, index_carry
);
3135 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3137 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3139 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3140 save_dist_v (ddr
, dist_v
);
3143 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3144 is the case for example when access functions are the same and
3145 equal to a constant, as in:
3152 in which case the distance vectors are (0) and (1). */
3155 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3159 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3161 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3162 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3163 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3165 for (j
= 0; j
< ca
->n
; j
++)
3166 if (affine_function_zero_p (ca
->fns
[j
]))
3168 insert_innermost_unit_dist_vector (ddr
);
3172 for (j
= 0; j
< cb
->n
; j
++)
3173 if (affine_function_zero_p (cb
->fns
[j
]))
3175 insert_innermost_unit_dist_vector (ddr
);
3181 /* Compute the classic per loop distance vector. DDR is the data
3182 dependence relation to build a vector from. Return false when fail
3183 to represent the data dependence as a distance vector. */
3186 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3187 struct loop
*loop_nest
)
3189 bool init_b
= false;
3190 int index_carry
= DDR_NB_LOOPS (ddr
);
3191 lambda_vector dist_v
;
3193 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3196 if (same_access_functions (ddr
))
3198 /* Save the 0 vector. */
3199 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3200 save_dist_v (ddr
, dist_v
);
3202 if (constant_access_functions (ddr
))
3203 add_distance_for_zero_overlaps (ddr
);
3205 if (DDR_NB_LOOPS (ddr
) > 1)
3206 add_other_self_distances (ddr
);
3211 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3212 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3213 dist_v
, &init_b
, &index_carry
))
3216 /* Save the distance vector if we initialized one. */
3219 /* Verify a basic constraint: classic distance vectors should
3220 always be lexicographically positive.
3222 Data references are collected in the order of execution of
3223 the program, thus for the following loop
3225 | for (i = 1; i < 100; i++)
3226 | for (j = 1; j < 100; j++)
3228 | t = T[j+1][i-1]; // A
3229 | T[j][i] = t + 2; // B
3232 references are collected following the direction of the wind:
3233 A then B. The data dependence tests are performed also
3234 following this order, such that we're looking at the distance
3235 separating the elements accessed by A from the elements later
3236 accessed by B. But in this example, the distance returned by
3237 test_dep (A, B) is lexicographically negative (-1, 1), that
3238 means that the access A occurs later than B with respect to
3239 the outer loop, ie. we're actually looking upwind. In this
3240 case we solve test_dep (B, A) looking downwind to the
3241 lexicographically positive solution, that returns the
3242 distance vector (1, -1). */
3243 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3245 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3246 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3249 compute_subscript_distance (ddr
);
3250 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3251 save_v
, &init_b
, &index_carry
))
3253 save_dist_v (ddr
, save_v
);
3254 DDR_REVERSED_P (ddr
) = true;
3256 /* In this case there is a dependence forward for all the
3259 | for (k = 1; k < 100; k++)
3260 | for (i = 1; i < 100; i++)
3261 | for (j = 1; j < 100; j++)
3263 | t = T[j+1][i-1]; // A
3264 | T[j][i] = t + 2; // B
3272 if (DDR_NB_LOOPS (ddr
) > 1)
3274 add_outer_distances (ddr
, save_v
, index_carry
);
3275 add_outer_distances (ddr
, dist_v
, index_carry
);
3280 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3281 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3283 if (DDR_NB_LOOPS (ddr
) > 1)
3285 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3287 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3288 DDR_A (ddr
), loop_nest
))
3290 compute_subscript_distance (ddr
);
3291 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3292 opposite_v
, &init_b
,
3296 save_dist_v (ddr
, save_v
);
3297 add_outer_distances (ddr
, dist_v
, index_carry
);
3298 add_outer_distances (ddr
, opposite_v
, index_carry
);
3301 save_dist_v (ddr
, save_v
);
3306 /* There is a distance of 1 on all the outer loops: Example:
3307 there is a dependence of distance 1 on loop_1 for the array A.
3313 add_outer_distances (ddr
, dist_v
,
3314 lambda_vector_first_nz (dist_v
,
3315 DDR_NB_LOOPS (ddr
), 0));
3318 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3322 fprintf (dump_file
, "(build_classic_dist_vector\n");
3323 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3325 fprintf (dump_file
, " dist_vector = (");
3326 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3327 DDR_NB_LOOPS (ddr
));
3328 fprintf (dump_file
, " )\n");
3330 fprintf (dump_file
, ")\n");
3336 /* Return the direction for a given distance.
3337 FIXME: Computing dir this way is suboptimal, since dir can catch
3338 cases that dist is unable to represent. */
3340 static inline enum data_dependence_direction
3341 dir_from_dist (int dist
)
3344 return dir_positive
;
3346 return dir_negative
;
3351 /* Compute the classic per loop direction vector. DDR is the data
3352 dependence relation to build a vector from. */
3355 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3358 lambda_vector dist_v
;
3360 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
3362 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3364 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3365 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3367 save_dir_v (ddr
, dir_v
);
3371 /* Helper function. Returns true when there is a dependence between
3372 data references DRA and DRB. */
3375 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3376 struct data_reference
*dra
,
3377 struct data_reference
*drb
,
3378 struct loop
*loop_nest
)
3381 tree last_conflicts
;
3382 struct subscript
*subscript
;
3384 for (i
= 0; VEC_iterate (subscript_p
, DDR_SUBSCRIPTS (ddr
), i
, subscript
);
3387 conflict_function
*overlaps_a
, *overlaps_b
;
3389 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3390 DR_ACCESS_FN (drb
, i
),
3391 &overlaps_a
, &overlaps_b
,
3392 &last_conflicts
, loop_nest
);
3394 if (CF_NOT_KNOWN_P (overlaps_a
)
3395 || CF_NOT_KNOWN_P (overlaps_b
))
3397 finalize_ddr_dependent (ddr
, chrec_dont_know
);
3398 dependence_stats
.num_dependence_undetermined
++;
3399 free_conflict_function (overlaps_a
);
3400 free_conflict_function (overlaps_b
);
3404 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3405 || CF_NO_DEPENDENCE_P (overlaps_b
))
3407 finalize_ddr_dependent (ddr
, chrec_known
);
3408 dependence_stats
.num_dependence_independent
++;
3409 free_conflict_function (overlaps_a
);
3410 free_conflict_function (overlaps_b
);
3416 if (SUB_CONFLICTS_IN_A (subscript
))
3417 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3418 if (SUB_CONFLICTS_IN_B (subscript
))
3419 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3421 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3422 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3423 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3430 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3433 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3434 struct loop
*loop_nest
)
3437 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3438 fprintf (dump_file
, "(subscript_dependence_tester \n");
3440 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3441 dependence_stats
.num_dependence_dependent
++;
3443 compute_subscript_distance (ddr
);
3444 if (build_classic_dist_vector (ddr
, loop_nest
))
3445 build_classic_dir_vector (ddr
);
3447 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3448 fprintf (dump_file
, ")\n");
3451 /* Returns true when all the access functions of A are affine or
3452 constant with respect to LOOP_NEST. */
3455 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3456 const struct loop
*loop_nest
)
3459 VEC(tree
,heap
) *fns
= DR_ACCESS_FNS (a
);
3462 FOR_EACH_VEC_ELT (tree
, fns
, i
, t
)
3463 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3464 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3470 /* Initializes an equation for an OMEGA problem using the information
3471 contained in the ACCESS_FUN. Returns true when the operation
3474 PB is the omega constraint system.
3475 EQ is the number of the equation to be initialized.
3476 OFFSET is used for shifting the variables names in the constraints:
3477 a constrain is composed of 2 * the number of variables surrounding
3478 dependence accesses. OFFSET is set either to 0 for the first n variables,
3479 then it is set to n.
3480 ACCESS_FUN is expected to be an affine chrec. */
3483 init_omega_eq_with_af (omega_pb pb
, unsigned eq
,
3484 unsigned int offset
, tree access_fun
,
3485 struct data_dependence_relation
*ddr
)
3487 switch (TREE_CODE (access_fun
))
3489 case POLYNOMIAL_CHREC
:
3491 tree left
= CHREC_LEFT (access_fun
);
3492 tree right
= CHREC_RIGHT (access_fun
);
3493 int var
= CHREC_VARIABLE (access_fun
);
3496 if (TREE_CODE (right
) != INTEGER_CST
)
3499 var_idx
= index_in_loop_nest (var
, DDR_LOOP_NEST (ddr
));
3500 pb
->eqs
[eq
].coef
[offset
+ var_idx
+ 1] = int_cst_value (right
);
3502 /* Compute the innermost loop index. */
3503 DDR_INNER_LOOP (ddr
) = MAX (DDR_INNER_LOOP (ddr
), var_idx
);
3506 pb
->eqs
[eq
].coef
[var_idx
+ DDR_NB_LOOPS (ddr
) + 1]
3507 += int_cst_value (right
);
3509 switch (TREE_CODE (left
))
3511 case POLYNOMIAL_CHREC
:
3512 return init_omega_eq_with_af (pb
, eq
, offset
, left
, ddr
);
3515 pb
->eqs
[eq
].coef
[0] += int_cst_value (left
);
3524 pb
->eqs
[eq
].coef
[0] += int_cst_value (access_fun
);
3532 /* As explained in the comments preceding init_omega_for_ddr, we have
3533 to set up a system for each loop level, setting outer loops
3534 variation to zero, and current loop variation to positive or zero.
3535 Save each lexico positive distance vector. */
3538 omega_extract_distance_vectors (omega_pb pb
,
3539 struct data_dependence_relation
*ddr
)
3543 struct loop
*loopi
, *loopj
;
3544 enum omega_result res
;
3546 /* Set a new problem for each loop in the nest. The basis is the
3547 problem that we have initialized until now. On top of this we
3548 add new constraints. */
3549 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3550 && VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
); i
++)
3553 omega_pb copy
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
),
3554 DDR_NB_LOOPS (ddr
));
3556 omega_copy_problem (copy
, pb
);
3558 /* For all the outer loops "loop_j", add "dj = 0". */
3560 j
< i
&& VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), j
, loopj
); j
++)
3562 eq
= omega_add_zero_eq (copy
, omega_black
);
3563 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3566 /* For "loop_i", add "0 <= di". */
3567 geq
= omega_add_zero_geq (copy
, omega_black
);
3568 copy
->geqs
[geq
].coef
[i
+ 1] = 1;
3570 /* Reduce the constraint system, and test that the current
3571 problem is feasible. */
3572 res
= omega_simplify_problem (copy
);
3573 if (res
== omega_false
3574 || res
== omega_unknown
3575 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3578 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3579 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3581 dist
= copy
->subs
[eq
].coef
[0];
3587 /* Reinitialize problem... */
3588 omega_copy_problem (copy
, pb
);
3590 j
< i
&& VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), j
, loopj
); j
++)
3592 eq
= omega_add_zero_eq (copy
, omega_black
);
3593 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3596 /* ..., but this time "di = 1". */
3597 eq
= omega_add_zero_eq (copy
, omega_black
);
3598 copy
->eqs
[eq
].coef
[i
+ 1] = 1;
3599 copy
->eqs
[eq
].coef
[0] = -1;
3601 res
= omega_simplify_problem (copy
);
3602 if (res
== omega_false
3603 || res
== omega_unknown
3604 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3607 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3608 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3610 dist
= copy
->subs
[eq
].coef
[0];
3616 /* Save the lexicographically positive distance vector. */
3619 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3620 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3624 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3625 if (copy
->subs
[eq
].key
> 0)
3627 dist
= copy
->subs
[eq
].coef
[0];
3628 dist_v
[copy
->subs
[eq
].key
- 1] = dist
;
3631 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3632 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3634 save_dist_v (ddr
, dist_v
);
3635 save_dir_v (ddr
, dir_v
);
3639 omega_free_problem (copy
);
3643 /* This is called for each subscript of a tuple of data references:
3644 insert an equality for representing the conflicts. */
3647 omega_setup_subscript (tree access_fun_a
, tree access_fun_b
,
3648 struct data_dependence_relation
*ddr
,
3649 omega_pb pb
, bool *maybe_dependent
)
3652 tree type
= signed_type_for_types (TREE_TYPE (access_fun_a
),
3653 TREE_TYPE (access_fun_b
));
3654 tree fun_a
= chrec_convert (type
, access_fun_a
, NULL
);
3655 tree fun_b
= chrec_convert (type
, access_fun_b
, NULL
);
3656 tree difference
= chrec_fold_minus (type
, fun_a
, fun_b
);
3659 /* When the fun_a - fun_b is not constant, the dependence is not
3660 captured by the classic distance vector representation. */
3661 if (TREE_CODE (difference
) != INTEGER_CST
)
3665 if (ziv_subscript_p (fun_a
, fun_b
) && !integer_zerop (difference
))
3667 /* There is no dependence. */
3668 *maybe_dependent
= false;
3672 minus_one
= build_int_cst (type
, -1);
3673 fun_b
= chrec_fold_multiply (type
, fun_b
, minus_one
);
3675 eq
= omega_add_zero_eq (pb
, omega_black
);
3676 if (!init_omega_eq_with_af (pb
, eq
, DDR_NB_LOOPS (ddr
), fun_a
, ddr
)
3677 || !init_omega_eq_with_af (pb
, eq
, 0, fun_b
, ddr
))
3678 /* There is probably a dependence, but the system of
3679 constraints cannot be built: answer "don't know". */
3683 if (DDR_NB_LOOPS (ddr
) != 0 && pb
->eqs
[eq
].coef
[0]
3684 && !int_divides_p (lambda_vector_gcd
3685 ((lambda_vector
) &(pb
->eqs
[eq
].coef
[1]),
3686 2 * DDR_NB_LOOPS (ddr
)),
3687 pb
->eqs
[eq
].coef
[0]))
3689 /* There is no dependence. */
3690 *maybe_dependent
= false;
3697 /* Helper function, same as init_omega_for_ddr but specialized for
3698 data references A and B. */
3701 init_omega_for_ddr_1 (struct data_reference
*dra
, struct data_reference
*drb
,
3702 struct data_dependence_relation
*ddr
,
3703 omega_pb pb
, bool *maybe_dependent
)
3708 unsigned nb_loops
= DDR_NB_LOOPS (ddr
);
3710 /* Insert an equality per subscript. */
3711 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3713 if (!omega_setup_subscript (DR_ACCESS_FN (dra
, i
), DR_ACCESS_FN (drb
, i
),
3714 ddr
, pb
, maybe_dependent
))
3716 else if (*maybe_dependent
== false)
3718 /* There is no dependence. */
3719 DDR_ARE_DEPENDENT (ddr
) = chrec_known
;
3724 /* Insert inequalities: constraints corresponding to the iteration
3725 domain, i.e. the loops surrounding the references "loop_x" and
3726 the distance variables "dx". The layout of the OMEGA
3727 representation is as follows:
3728 - coef[0] is the constant
3729 - coef[1..nb_loops] are the protected variables that will not be
3730 removed by the solver: the "dx"
3731 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3733 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3734 && VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
); i
++)
3736 HOST_WIDE_INT nbi
= max_stmt_executions_int (loopi
, true);
3739 ineq
= omega_add_zero_geq (pb
, omega_black
);
3740 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3742 /* 0 <= loop_x + dx */
3743 ineq
= omega_add_zero_geq (pb
, omega_black
);
3744 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3745 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3749 /* loop_x <= nb_iters */
3750 ineq
= omega_add_zero_geq (pb
, omega_black
);
3751 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3752 pb
->geqs
[ineq
].coef
[0] = nbi
;
3754 /* loop_x + dx <= nb_iters */
3755 ineq
= omega_add_zero_geq (pb
, omega_black
);
3756 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3757 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3758 pb
->geqs
[ineq
].coef
[0] = nbi
;
3760 /* A step "dx" bigger than nb_iters is not feasible, so
3761 add "0 <= nb_iters + dx", */
3762 ineq
= omega_add_zero_geq (pb
, omega_black
);
3763 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3764 pb
->geqs
[ineq
].coef
[0] = nbi
;
3765 /* and "dx <= nb_iters". */
3766 ineq
= omega_add_zero_geq (pb
, omega_black
);
3767 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3768 pb
->geqs
[ineq
].coef
[0] = nbi
;
3772 omega_extract_distance_vectors (pb
, ddr
);
3777 /* Sets up the Omega dependence problem for the data dependence
3778 relation DDR. Returns false when the constraint system cannot be
3779 built, ie. when the test answers "don't know". Returns true
3780 otherwise, and when independence has been proved (using one of the
3781 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3782 set MAYBE_DEPENDENT to true.
3784 Example: for setting up the dependence system corresponding to the
3785 conflicting accesses
3790 | ... A[2*j, 2*(i + j)]
3794 the following constraints come from the iteration domain:
3801 where di, dj are the distance variables. The constraints
3802 representing the conflicting elements are:
3805 i + 1 = 2 * (i + di + j + dj)
3807 For asking that the resulting distance vector (di, dj) be
3808 lexicographically positive, we insert the constraint "di >= 0". If
3809 "di = 0" in the solution, we fix that component to zero, and we
3810 look at the inner loops: we set a new problem where all the outer
3811 loop distances are zero, and fix this inner component to be
3812 positive. When one of the components is positive, we save that
3813 distance, and set a new problem where the distance on this loop is
3814 zero, searching for other distances in the inner loops. Here is
3815 the classic example that illustrates that we have to set for each
3816 inner loop a new problem:
3824 we have to save two distances (1, 0) and (0, 1).
3826 Given two array references, refA and refB, we have to set the
3827 dependence problem twice, refA vs. refB and refB vs. refA, and we
3828 cannot do a single test, as refB might occur before refA in the
3829 inner loops, and the contrary when considering outer loops: ex.
3834 | T[{1,+,1}_2][{1,+,1}_1] // refA
3835 | T[{2,+,1}_2][{0,+,1}_1] // refB
3840 refB touches the elements in T before refA, and thus for the same
3841 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3842 but for successive loop_0 iterations, we have (1, -1, 1)
3844 The Omega solver expects the distance variables ("di" in the
3845 previous example) to come first in the constraint system (as
3846 variables to be protected, or "safe" variables), the constraint
3847 system is built using the following layout:
3849 "cst | distance vars | index vars".
3853 init_omega_for_ddr (struct data_dependence_relation
*ddr
,
3854 bool *maybe_dependent
)
3859 *maybe_dependent
= true;
3861 if (same_access_functions (ddr
))
3864 lambda_vector dir_v
;
3866 /* Save the 0 vector. */
3867 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
3868 dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3869 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3870 dir_v
[j
] = dir_equal
;
3871 save_dir_v (ddr
, dir_v
);
3873 /* Save the dependences carried by outer loops. */
3874 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3875 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
3877 omega_free_problem (pb
);
3881 /* Omega expects the protected variables (those that have to be kept
3882 after elimination) to appear first in the constraint system.
3883 These variables are the distance variables. In the following
3884 initialization we declare NB_LOOPS safe variables, and the total
3885 number of variables for the constraint system is 2*NB_LOOPS. */
3886 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3887 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
3889 omega_free_problem (pb
);
3891 /* Stop computation if not decidable, or no dependence. */
3892 if (res
== false || *maybe_dependent
== false)
3895 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3896 res
= init_omega_for_ddr_1 (DDR_B (ddr
), DDR_A (ddr
), ddr
, pb
,
3898 omega_free_problem (pb
);
3903 /* Return true when DDR contains the same information as that stored
3904 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3907 ddr_consistent_p (FILE *file
,
3908 struct data_dependence_relation
*ddr
,
3909 VEC (lambda_vector
, heap
) *dist_vects
,
3910 VEC (lambda_vector
, heap
) *dir_vects
)
3914 /* If dump_file is set, output there. */
3915 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3918 if (VEC_length (lambda_vector
, dist_vects
) != DDR_NUM_DIST_VECTS (ddr
))
3920 lambda_vector b_dist_v
;
3921 fprintf (file
, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3922 VEC_length (lambda_vector
, dist_vects
),
3923 DDR_NUM_DIST_VECTS (ddr
));
3925 fprintf (file
, "Banerjee dist vectors:\n");
3926 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, i
, b_dist_v
)
3927 print_lambda_vector (file
, b_dist_v
, DDR_NB_LOOPS (ddr
));
3929 fprintf (file
, "Omega dist vectors:\n");
3930 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3931 print_lambda_vector (file
, DDR_DIST_VECT (ddr
, i
), DDR_NB_LOOPS (ddr
));
3933 fprintf (file
, "data dependence relation:\n");
3934 dump_data_dependence_relation (file
, ddr
);
3936 fprintf (file
, ")\n");
3940 if (VEC_length (lambda_vector
, dir_vects
) != DDR_NUM_DIR_VECTS (ddr
))
3942 fprintf (file
, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3943 VEC_length (lambda_vector
, dir_vects
),
3944 DDR_NUM_DIR_VECTS (ddr
));
3948 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3950 lambda_vector a_dist_v
;
3951 lambda_vector b_dist_v
= DDR_DIST_VECT (ddr
, i
);
3953 /* Distance vectors are not ordered in the same way in the DDR
3954 and in the DIST_VECTS: search for a matching vector. */
3955 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, j
, a_dist_v
)
3956 if (lambda_vector_equal (a_dist_v
, b_dist_v
, DDR_NB_LOOPS (ddr
)))
3959 if (j
== VEC_length (lambda_vector
, dist_vects
))
3961 fprintf (file
, "\n(Dist vectors from the first dependence analyzer:\n");
3962 print_dist_vectors (file
, dist_vects
, DDR_NB_LOOPS (ddr
));
3963 fprintf (file
, "not found in Omega dist vectors:\n");
3964 print_dist_vectors (file
, DDR_DIST_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
3965 fprintf (file
, "data dependence relation:\n");
3966 dump_data_dependence_relation (file
, ddr
);
3967 fprintf (file
, ")\n");
3971 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
3973 lambda_vector a_dir_v
;
3974 lambda_vector b_dir_v
= DDR_DIR_VECT (ddr
, i
);
3976 /* Direction vectors are not ordered in the same way in the DDR
3977 and in the DIR_VECTS: search for a matching vector. */
3978 FOR_EACH_VEC_ELT (lambda_vector
, dir_vects
, j
, a_dir_v
)
3979 if (lambda_vector_equal (a_dir_v
, b_dir_v
, DDR_NB_LOOPS (ddr
)))
3982 if (j
== VEC_length (lambda_vector
, dist_vects
))
3984 fprintf (file
, "\n(Dir vectors from the first dependence analyzer:\n");
3985 print_dir_vectors (file
, dir_vects
, DDR_NB_LOOPS (ddr
));
3986 fprintf (file
, "not found in Omega dir vectors:\n");
3987 print_dir_vectors (file
, DDR_DIR_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
3988 fprintf (file
, "data dependence relation:\n");
3989 dump_data_dependence_relation (file
, ddr
);
3990 fprintf (file
, ")\n");
3997 /* This computes the affine dependence relation between A and B with
3998 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3999 independence between two accesses, while CHREC_DONT_KNOW is used
4000 for representing the unknown relation.
4002 Note that it is possible to stop the computation of the dependence
4003 relation the first time we detect a CHREC_KNOWN element for a given
4007 compute_affine_dependence (struct data_dependence_relation
*ddr
,
4008 struct loop
*loop_nest
)
4010 struct data_reference
*dra
= DDR_A (ddr
);
4011 struct data_reference
*drb
= DDR_B (ddr
);
4013 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4015 fprintf (dump_file
, "(compute_affine_dependence\n");
4016 fprintf (dump_file
, " (stmt_a = \n");
4017 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, 0);
4018 fprintf (dump_file
, ")\n (stmt_b = \n");
4019 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, 0);
4020 fprintf (dump_file
, ")\n");
4023 /* Analyze only when the dependence relation is not yet known. */
4024 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
4025 && !DDR_SELF_REFERENCE (ddr
))
4027 dependence_stats
.num_dependence_tests
++;
4029 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
4030 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
4032 if (flag_check_data_deps
)
4034 /* Compute the dependences using the first algorithm. */
4035 subscript_dependence_tester (ddr
, loop_nest
);
4037 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4039 fprintf (dump_file
, "\n\nBanerjee Analyzer\n");
4040 dump_data_dependence_relation (dump_file
, ddr
);
4043 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4045 bool maybe_dependent
;
4046 VEC (lambda_vector
, heap
) *dir_vects
, *dist_vects
;
4048 /* Save the result of the first DD analyzer. */
4049 dist_vects
= DDR_DIST_VECTS (ddr
);
4050 dir_vects
= DDR_DIR_VECTS (ddr
);
4052 /* Reset the information. */
4053 DDR_DIST_VECTS (ddr
) = NULL
;
4054 DDR_DIR_VECTS (ddr
) = NULL
;
4056 /* Compute the same information using Omega. */
4057 if (!init_omega_for_ddr (ddr
, &maybe_dependent
))
4058 goto csys_dont_know
;
4060 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4062 fprintf (dump_file
, "Omega Analyzer\n");
4063 dump_data_dependence_relation (dump_file
, ddr
);
4066 /* Check that we get the same information. */
4067 if (maybe_dependent
)
4068 gcc_assert (ddr_consistent_p (stderr
, ddr
, dist_vects
,
4073 subscript_dependence_tester (ddr
, loop_nest
);
4076 /* As a last case, if the dependence cannot be determined, or if
4077 the dependence is considered too difficult to determine, answer
4082 dependence_stats
.num_dependence_undetermined
++;
4084 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4086 fprintf (dump_file
, "Data ref a:\n");
4087 dump_data_reference (dump_file
, dra
);
4088 fprintf (dump_file
, "Data ref b:\n");
4089 dump_data_reference (dump_file
, drb
);
4090 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4092 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4096 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4097 fprintf (dump_file
, ")\n");
4100 /* This computes the dependence relation for the same data
4101 reference into DDR. */
4104 compute_self_dependence (struct data_dependence_relation
*ddr
)
4107 struct subscript
*subscript
;
4109 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
4112 for (i
= 0; VEC_iterate (subscript_p
, DDR_SUBSCRIPTS (ddr
), i
, subscript
);
4115 if (SUB_CONFLICTS_IN_A (subscript
))
4116 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
4117 if (SUB_CONFLICTS_IN_B (subscript
))
4118 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
4120 /* The accessed index overlaps for each iteration. */
4121 SUB_CONFLICTS_IN_A (subscript
)
4122 = conflict_fn (1, affine_fn_cst (integer_zero_node
));
4123 SUB_CONFLICTS_IN_B (subscript
)
4124 = conflict_fn (1, affine_fn_cst (integer_zero_node
));
4125 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
4128 /* The distance vector is the zero vector. */
4129 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4130 save_dir_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4133 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4134 the data references in DATAREFS, in the LOOP_NEST. When
4135 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4139 compute_all_dependences (VEC (data_reference_p
, heap
) *datarefs
,
4140 VEC (ddr_p
, heap
) **dependence_relations
,
4141 VEC (loop_p
, heap
) *loop_nest
,
4142 bool compute_self_and_rr
)
4144 struct data_dependence_relation
*ddr
;
4145 struct data_reference
*a
, *b
;
4148 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, a
)
4149 for (j
= i
+ 1; VEC_iterate (data_reference_p
, datarefs
, j
, b
); j
++)
4150 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4152 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4153 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4155 compute_affine_dependence (ddr
, VEC_index (loop_p
, loop_nest
, 0));
4158 if (compute_self_and_rr
)
4159 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, a
)
4161 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4162 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4163 compute_self_dependence (ddr
);
4167 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4168 true if STMT clobbers memory, false otherwise. */
4171 get_references_in_stmt (gimple stmt
, VEC (data_ref_loc
, heap
) **references
)
4173 bool clobbers_memory
= false;
4176 enum gimple_code stmt_code
= gimple_code (stmt
);
4180 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4181 Calls have side-effects, except those to const or pure
4183 if ((stmt_code
== GIMPLE_CALL
4184 && !(gimple_call_flags (stmt
) & (ECF_CONST
| ECF_PURE
)))
4185 || (stmt_code
== GIMPLE_ASM
4186 && gimple_asm_volatile_p (stmt
)))
4187 clobbers_memory
= true;
4189 if (!gimple_vuse (stmt
))
4190 return clobbers_memory
;
4192 if (stmt_code
== GIMPLE_ASSIGN
)
4195 op0
= gimple_assign_lhs_ptr (stmt
);
4196 op1
= gimple_assign_rhs1_ptr (stmt
);
4199 || (REFERENCE_CLASS_P (*op1
)
4200 && (base
= get_base_address (*op1
))
4201 && TREE_CODE (base
) != SSA_NAME
))
4203 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4205 ref
->is_read
= true;
4208 else if (stmt_code
== GIMPLE_CALL
)
4212 op0
= gimple_call_lhs_ptr (stmt
);
4213 n
= gimple_call_num_args (stmt
);
4214 for (i
= 0; i
< n
; i
++)
4216 op1
= gimple_call_arg_ptr (stmt
, i
);
4219 || (REFERENCE_CLASS_P (*op1
) && get_base_address (*op1
)))
4221 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4223 ref
->is_read
= true;
4228 return clobbers_memory
;
4232 || (REFERENCE_CLASS_P (*op0
) && get_base_address (*op0
))))
4234 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4236 ref
->is_read
= false;
4238 return clobbers_memory
;
4241 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4242 reference, returns false, otherwise returns true. NEST is the outermost
4243 loop of the loop nest in which the references should be analyzed. */
4246 find_data_references_in_stmt (struct loop
*nest
, gimple stmt
,
4247 VEC (data_reference_p
, heap
) **datarefs
)
4250 VEC (data_ref_loc
, heap
) *references
;
4253 data_reference_p dr
;
4255 if (get_references_in_stmt (stmt
, &references
))
4257 VEC_free (data_ref_loc
, heap
, references
);
4261 FOR_EACH_VEC_ELT (data_ref_loc
, references
, i
, ref
)
4263 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4264 *ref
->pos
, stmt
, ref
->is_read
);
4265 gcc_assert (dr
!= NULL
);
4266 VEC_safe_push (data_reference_p
, heap
, *datarefs
, dr
);
4268 VEC_free (data_ref_loc
, heap
, references
);
4272 /* Stores the data references in STMT to DATAREFS. If there is an
4273 unanalyzable reference, returns false, otherwise returns true.
4274 NEST is the outermost loop of the loop nest in which the references
4275 should be instantiated, LOOP is the loop in which the references
4276 should be analyzed. */
4279 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple stmt
,
4280 VEC (data_reference_p
, heap
) **datarefs
)
4283 VEC (data_ref_loc
, heap
) *references
;
4286 data_reference_p dr
;
4288 if (get_references_in_stmt (stmt
, &references
))
4290 VEC_free (data_ref_loc
, heap
, references
);
4294 FOR_EACH_VEC_ELT (data_ref_loc
, references
, i
, ref
)
4296 dr
= create_data_ref (nest
, loop
, *ref
->pos
, stmt
, ref
->is_read
);
4297 gcc_assert (dr
!= NULL
);
4298 VEC_safe_push (data_reference_p
, heap
, *datarefs
, dr
);
4301 VEC_free (data_ref_loc
, heap
, references
);
4305 /* Search the data references in LOOP, and record the information into
4306 DATAREFS. Returns chrec_dont_know when failing to analyze a
4307 difficult case, returns NULL_TREE otherwise. */
4310 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4311 VEC (data_reference_p
, heap
) **datarefs
)
4313 gimple_stmt_iterator bsi
;
4315 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4317 gimple stmt
= gsi_stmt (bsi
);
4319 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4321 struct data_reference
*res
;
4322 res
= XCNEW (struct data_reference
);
4323 VEC_safe_push (data_reference_p
, heap
, *datarefs
, res
);
4325 return chrec_dont_know
;
4332 /* Search the data references in LOOP, and record the information into
4333 DATAREFS. Returns chrec_dont_know when failing to analyze a
4334 difficult case, returns NULL_TREE otherwise.
4336 TODO: This function should be made smarter so that it can handle address
4337 arithmetic as if they were array accesses, etc. */
4340 find_data_references_in_loop (struct loop
*loop
,
4341 VEC (data_reference_p
, heap
) **datarefs
)
4343 basic_block bb
, *bbs
;
4346 bbs
= get_loop_body_in_dom_order (loop
);
4348 for (i
= 0; i
< loop
->num_nodes
; i
++)
4352 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4355 return chrec_dont_know
;
4363 /* Recursive helper function. */
4366 find_loop_nest_1 (struct loop
*loop
, VEC (loop_p
, heap
) **loop_nest
)
4368 /* Inner loops of the nest should not contain siblings. Example:
4369 when there are two consecutive loops,
4380 the dependence relation cannot be captured by the distance
4385 VEC_safe_push (loop_p
, heap
, *loop_nest
, loop
);
4387 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4391 /* Return false when the LOOP is not well nested. Otherwise return
4392 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4393 contain the loops from the outermost to the innermost, as they will
4394 appear in the classic distance vector. */
4397 find_loop_nest (struct loop
*loop
, VEC (loop_p
, heap
) **loop_nest
)
4399 VEC_safe_push (loop_p
, heap
, *loop_nest
, loop
);
4401 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4405 /* Returns true when the data dependences have been computed, false otherwise.
4406 Given a loop nest LOOP, the following vectors are returned:
4407 DATAREFS is initialized to all the array elements contained in this loop,
4408 DEPENDENCE_RELATIONS contains the relations between the data references.
4409 Compute read-read and self relations if
4410 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4413 compute_data_dependences_for_loop (struct loop
*loop
,
4414 bool compute_self_and_read_read_dependences
,
4415 VEC (loop_p
, heap
) **loop_nest
,
4416 VEC (data_reference_p
, heap
) **datarefs
,
4417 VEC (ddr_p
, heap
) **dependence_relations
)
4421 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4423 /* If the loop nest is not well formed, or one of the data references
4424 is not computable, give up without spending time to compute other
4427 || !find_loop_nest (loop
, loop_nest
)
4428 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
)
4430 struct data_dependence_relation
*ddr
;
4432 /* Insert a single relation into dependence_relations:
4434 ddr
= initialize_data_dependence_relation (NULL
, NULL
, *loop_nest
);
4435 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4439 compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4440 compute_self_and_read_read_dependences
);
4442 if (dump_file
&& (dump_flags
& TDF_STATS
))
4444 fprintf (dump_file
, "Dependence tester statistics:\n");
4446 fprintf (dump_file
, "Number of dependence tests: %d\n",
4447 dependence_stats
.num_dependence_tests
);
4448 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4449 dependence_stats
.num_dependence_dependent
);
4450 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4451 dependence_stats
.num_dependence_independent
);
4452 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4453 dependence_stats
.num_dependence_undetermined
);
4455 fprintf (dump_file
, "Number of subscript tests: %d\n",
4456 dependence_stats
.num_subscript_tests
);
4457 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4458 dependence_stats
.num_subscript_undetermined
);
4459 fprintf (dump_file
, "Number of same subscript function: %d\n",
4460 dependence_stats
.num_same_subscript_function
);
4462 fprintf (dump_file
, "Number of ziv tests: %d\n",
4463 dependence_stats
.num_ziv
);
4464 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4465 dependence_stats
.num_ziv_dependent
);
4466 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4467 dependence_stats
.num_ziv_independent
);
4468 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4469 dependence_stats
.num_ziv_unimplemented
);
4471 fprintf (dump_file
, "Number of siv tests: %d\n",
4472 dependence_stats
.num_siv
);
4473 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4474 dependence_stats
.num_siv_dependent
);
4475 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4476 dependence_stats
.num_siv_independent
);
4477 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4478 dependence_stats
.num_siv_unimplemented
);
4480 fprintf (dump_file
, "Number of miv tests: %d\n",
4481 dependence_stats
.num_miv
);
4482 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4483 dependence_stats
.num_miv_dependent
);
4484 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4485 dependence_stats
.num_miv_independent
);
4486 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4487 dependence_stats
.num_miv_unimplemented
);
4493 /* Returns true when the data dependences for the basic block BB have been
4494 computed, false otherwise.
4495 DATAREFS is initialized to all the array elements contained in this basic
4496 block, DEPENDENCE_RELATIONS contains the relations between the data
4497 references. Compute read-read and self relations if
4498 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4500 compute_data_dependences_for_bb (basic_block bb
,
4501 bool compute_self_and_read_read_dependences
,
4502 VEC (data_reference_p
, heap
) **datarefs
,
4503 VEC (ddr_p
, heap
) **dependence_relations
)
4505 if (find_data_references_in_bb (NULL
, bb
, datarefs
) == chrec_dont_know
)
4508 compute_all_dependences (*datarefs
, dependence_relations
, NULL
,
4509 compute_self_and_read_read_dependences
);
4513 /* Entry point (for testing only). Analyze all the data references
4514 and the dependence relations in LOOP.
4516 The data references are computed first.
4518 A relation on these nodes is represented by a complete graph. Some
4519 of the relations could be of no interest, thus the relations can be
4522 In the following function we compute all the relations. This is
4523 just a first implementation that is here for:
4524 - for showing how to ask for the dependence relations,
4525 - for the debugging the whole dependence graph,
4526 - for the dejagnu testcases and maintenance.
4528 It is possible to ask only for a part of the graph, avoiding to
4529 compute the whole dependence graph. The computed dependences are
4530 stored in a knowledge base (KB) such that later queries don't
4531 recompute the same information. The implementation of this KB is
4532 transparent to the optimizer, and thus the KB can be changed with a
4533 more efficient implementation, or the KB could be disabled. */
4535 analyze_all_data_dependences (struct loop
*loop
)
4538 int nb_data_refs
= 10;
4539 VEC (data_reference_p
, heap
) *datarefs
=
4540 VEC_alloc (data_reference_p
, heap
, nb_data_refs
);
4541 VEC (ddr_p
, heap
) *dependence_relations
=
4542 VEC_alloc (ddr_p
, heap
, nb_data_refs
* nb_data_refs
);
4543 VEC (loop_p
, heap
) *loop_nest
= VEC_alloc (loop_p
, heap
, 3);
4545 /* Compute DDs on the whole function. */
4546 compute_data_dependences_for_loop (loop
, false, &loop_nest
, &datarefs
,
4547 &dependence_relations
);
4551 dump_data_dependence_relations (dump_file
, dependence_relations
);
4552 fprintf (dump_file
, "\n\n");
4554 if (dump_flags
& TDF_DETAILS
)
4555 dump_dist_dir_vectors (dump_file
, dependence_relations
);
4557 if (dump_flags
& TDF_STATS
)
4559 unsigned nb_top_relations
= 0;
4560 unsigned nb_bot_relations
= 0;
4561 unsigned nb_chrec_relations
= 0;
4562 struct data_dependence_relation
*ddr
;
4564 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4566 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr
)))
4569 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4573 nb_chrec_relations
++;
4576 gather_stats_on_scev_database ();
4580 VEC_free (loop_p
, heap
, loop_nest
);
4581 free_dependence_relations (dependence_relations
);
4582 free_data_refs (datarefs
);
4585 /* Computes all the data dependences and check that the results of
4586 several analyzers are the same. */
4589 tree_check_data_deps (void)
4592 struct loop
*loop_nest
;
4594 FOR_EACH_LOOP (li
, loop_nest
, 0)
4595 analyze_all_data_dependences (loop_nest
);
4598 /* Free the memory used by a data dependence relation DDR. */
4601 free_dependence_relation (struct data_dependence_relation
*ddr
)
4606 if (DDR_SUBSCRIPTS (ddr
))
4607 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4608 if (DDR_DIST_VECTS (ddr
))
4609 VEC_free (lambda_vector
, heap
, DDR_DIST_VECTS (ddr
));
4610 if (DDR_DIR_VECTS (ddr
))
4611 VEC_free (lambda_vector
, heap
, DDR_DIR_VECTS (ddr
));
4616 /* Free the memory used by the data dependence relations from
4617 DEPENDENCE_RELATIONS. */
4620 free_dependence_relations (VEC (ddr_p
, heap
) *dependence_relations
)
4623 struct data_dependence_relation
*ddr
;
4625 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4627 free_dependence_relation (ddr
);
4629 VEC_free (ddr_p
, heap
, dependence_relations
);
4632 /* Free the memory used by the data references from DATAREFS. */
4635 free_data_refs (VEC (data_reference_p
, heap
) *datarefs
)
4638 struct data_reference
*dr
;
4640 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
4642 VEC_free (data_reference_p
, heap
, datarefs
);
4647 /* Dump vertex I in RDG to FILE. */
4650 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
4652 struct vertex
*v
= &(rdg
->vertices
[i
]);
4653 struct graph_edge
*e
;
4655 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
4656 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
4657 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
4660 for (e
= v
->pred
; e
; e
= e
->pred_next
)
4661 fprintf (file
, " %d", e
->src
);
4663 fprintf (file
, ") (out:");
4666 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4667 fprintf (file
, " %d", e
->dest
);
4669 fprintf (file
, ")\n");
4670 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
4671 fprintf (file
, ")\n");
4674 /* Call dump_rdg_vertex on stderr. */
4677 debug_rdg_vertex (struct graph
*rdg
, int i
)
4679 dump_rdg_vertex (stderr
, rdg
, i
);
4682 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4683 dumped vertices to that bitmap. */
4685 void dump_rdg_component (FILE *file
, struct graph
*rdg
, int c
, bitmap dumped
)
4689 fprintf (file
, "(%d\n", c
);
4691 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4692 if (rdg
->vertices
[i
].component
== c
)
4695 bitmap_set_bit (dumped
, i
);
4697 dump_rdg_vertex (file
, rdg
, i
);
4700 fprintf (file
, ")\n");
4703 /* Call dump_rdg_vertex on stderr. */
4706 debug_rdg_component (struct graph
*rdg
, int c
)
4708 dump_rdg_component (stderr
, rdg
, c
, NULL
);
4711 /* Dump the reduced dependence graph RDG to FILE. */
4714 dump_rdg (FILE *file
, struct graph
*rdg
)
4717 bitmap dumped
= BITMAP_ALLOC (NULL
);
4719 fprintf (file
, "(rdg\n");
4721 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4722 if (!bitmap_bit_p (dumped
, i
))
4723 dump_rdg_component (file
, rdg
, rdg
->vertices
[i
].component
, dumped
);
4725 fprintf (file
, ")\n");
4726 BITMAP_FREE (dumped
);
4729 /* Call dump_rdg on stderr. */
4732 debug_rdg (struct graph
*rdg
)
4734 dump_rdg (stderr
, rdg
);
4738 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
4742 fprintf (file
, "digraph RDG {\n");
4744 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4746 struct vertex
*v
= &(rdg
->vertices
[i
]);
4747 struct graph_edge
*e
;
4749 /* Highlight reads from memory. */
4750 if (RDG_MEM_READS_STMT (rdg
, i
))
4751 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
4753 /* Highlight stores to memory. */
4754 if (RDG_MEM_WRITE_STMT (rdg
, i
))
4755 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
4758 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4759 switch (RDGE_TYPE (e
))
4762 fprintf (file
, "%d -> %d [label=input] \n", i
, e
->dest
);
4766 fprintf (file
, "%d -> %d [label=output] \n", i
, e
->dest
);
4770 /* These are the most common dependences: don't print these. */
4771 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
4775 fprintf (file
, "%d -> %d [label=anti] \n", i
, e
->dest
);
4783 fprintf (file
, "}\n\n");
4786 /* Display the Reduced Dependence Graph using dotty. */
4787 extern void dot_rdg (struct graph
*);
4790 dot_rdg (struct graph
*rdg
)
4792 /* When debugging, enable the following code. This cannot be used
4793 in production compilers because it calls "system". */
4795 FILE *file
= fopen ("/tmp/rdg.dot", "w");
4796 gcc_assert (file
!= NULL
);
4798 dot_rdg_1 (file
, rdg
);
4801 system ("dotty /tmp/rdg.dot &");
4803 dot_rdg_1 (stderr
, rdg
);
4807 /* This structure is used for recording the mapping statement index in
4810 struct GTY(()) rdg_vertex_info
4816 /* Returns the index of STMT in RDG. */
4819 rdg_vertex_for_stmt (struct graph
*rdg
, gimple stmt
)
4821 struct rdg_vertex_info rvi
, *slot
;
4824 slot
= (struct rdg_vertex_info
*) htab_find (rdg
->indices
, &rvi
);
4832 /* Creates an edge in RDG for each distance vector from DDR. The
4833 order that we keep track of in the RDG is the order in which
4834 statements have to be executed. */
4837 create_rdg_edge_for_ddr (struct graph
*rdg
, ddr_p ddr
)
4839 struct graph_edge
*e
;
4841 data_reference_p dra
= DDR_A (ddr
);
4842 data_reference_p drb
= DDR_B (ddr
);
4843 unsigned level
= ddr_dependence_level (ddr
);
4845 /* For non scalar dependences, when the dependence is REVERSED,
4846 statement B has to be executed before statement A. */
4848 && !DDR_REVERSED_P (ddr
))
4850 data_reference_p tmp
= dra
;
4855 va
= rdg_vertex_for_stmt (rdg
, DR_STMT (dra
));
4856 vb
= rdg_vertex_for_stmt (rdg
, DR_STMT (drb
));
4858 if (va
< 0 || vb
< 0)
4861 e
= add_edge (rdg
, va
, vb
);
4862 e
->data
= XNEW (struct rdg_edge
);
4864 RDGE_LEVEL (e
) = level
;
4865 RDGE_RELATION (e
) = ddr
;
4867 /* Determines the type of the data dependence. */
4868 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
4869 RDGE_TYPE (e
) = input_dd
;
4870 else if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
4871 RDGE_TYPE (e
) = output_dd
;
4872 else if (DR_IS_WRITE (dra
) && DR_IS_READ (drb
))
4873 RDGE_TYPE (e
) = flow_dd
;
4874 else if (DR_IS_READ (dra
) && DR_IS_WRITE (drb
))
4875 RDGE_TYPE (e
) = anti_dd
;
4878 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4879 the index of DEF in RDG. */
4882 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
4884 use_operand_p imm_use_p
;
4885 imm_use_iterator iterator
;
4887 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
4889 struct graph_edge
*e
;
4890 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
4895 e
= add_edge (rdg
, idef
, use
);
4896 e
->data
= XNEW (struct rdg_edge
);
4897 RDGE_TYPE (e
) = flow_dd
;
4898 RDGE_RELATION (e
) = NULL
;
4902 /* Creates the edges of the reduced dependence graph RDG. */
4905 create_rdg_edges (struct graph
*rdg
, VEC (ddr_p
, heap
) *ddrs
)
4908 struct data_dependence_relation
*ddr
;
4909 def_operand_p def_p
;
4912 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
4913 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4914 create_rdg_edge_for_ddr (rdg
, ddr
);
4916 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4917 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
4919 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
4922 /* Build the vertices of the reduced dependence graph RDG. */
4925 create_rdg_vertices (struct graph
*rdg
, VEC (gimple
, heap
) *stmts
)
4930 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
4932 VEC (data_ref_loc
, heap
) *references
;
4934 struct vertex
*v
= &(rdg
->vertices
[i
]);
4935 struct rdg_vertex_info
*rvi
= XNEW (struct rdg_vertex_info
);
4936 struct rdg_vertex_info
**slot
;
4940 slot
= (struct rdg_vertex_info
**) htab_find_slot (rdg
->indices
, rvi
, INSERT
);
4947 v
->data
= XNEW (struct rdg_vertex
);
4948 RDG_STMT (rdg
, i
) = stmt
;
4950 RDG_MEM_WRITE_STMT (rdg
, i
) = false;
4951 RDG_MEM_READS_STMT (rdg
, i
) = false;
4952 if (gimple_code (stmt
) == GIMPLE_PHI
)
4955 get_references_in_stmt (stmt
, &references
);
4956 FOR_EACH_VEC_ELT (data_ref_loc
, references
, j
, ref
)
4958 RDG_MEM_WRITE_STMT (rdg
, i
) = true;
4960 RDG_MEM_READS_STMT (rdg
, i
) = true;
4962 VEC_free (data_ref_loc
, heap
, references
);
4966 /* Initialize STMTS with all the statements of LOOP. When
4967 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4968 which we discover statements is important as
4969 generate_loops_for_partition is using the same traversal for
4970 identifying statements. */
4973 stmts_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
4976 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
4978 for (i
= 0; i
< loop
->num_nodes
; i
++)
4980 basic_block bb
= bbs
[i
];
4981 gimple_stmt_iterator bsi
;
4984 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4985 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (bsi
));
4987 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4989 stmt
= gsi_stmt (bsi
);
4990 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
4991 VEC_safe_push (gimple
, heap
, *stmts
, stmt
);
4998 /* Returns true when all the dependences are computable. */
5001 known_dependences_p (VEC (ddr_p
, heap
) *dependence_relations
)
5006 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
5007 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
5013 /* Computes a hash function for element ELT. */
5016 hash_stmt_vertex_info (const void *elt
)
5018 const struct rdg_vertex_info
*const rvi
=
5019 (const struct rdg_vertex_info
*) elt
;
5020 gimple stmt
= rvi
->stmt
;
5022 return htab_hash_pointer (stmt
);
5025 /* Compares database elements E1 and E2. */
5028 eq_stmt_vertex_info (const void *e1
, const void *e2
)
5030 const struct rdg_vertex_info
*elt1
= (const struct rdg_vertex_info
*) e1
;
5031 const struct rdg_vertex_info
*elt2
= (const struct rdg_vertex_info
*) e2
;
5033 return elt1
->stmt
== elt2
->stmt
;
5036 /* Free the element E. */
5039 hash_stmt_vertex_del (void *e
)
5044 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5045 statement of the loop nest, and one edge per data dependence or
5046 scalar dependence. */
5049 build_empty_rdg (int n_stmts
)
5051 int nb_data_refs
= 10;
5052 struct graph
*rdg
= new_graph (n_stmts
);
5054 rdg
->indices
= htab_create (nb_data_refs
, hash_stmt_vertex_info
,
5055 eq_stmt_vertex_info
, hash_stmt_vertex_del
);
5059 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5060 statement of the loop nest, and one edge per data dependence or
5061 scalar dependence. */
5064 build_rdg (struct loop
*loop
,
5065 VEC (loop_p
, heap
) **loop_nest
,
5066 VEC (ddr_p
, heap
) **dependence_relations
,
5067 VEC (data_reference_p
, heap
) **datarefs
)
5069 struct graph
*rdg
= NULL
;
5070 VEC (gimple
, heap
) *stmts
= VEC_alloc (gimple
, heap
, 10);
5072 compute_data_dependences_for_loop (loop
, false, loop_nest
, datarefs
,
5073 dependence_relations
);
5075 if (known_dependences_p (*dependence_relations
))
5077 stmts_from_loop (loop
, &stmts
);
5078 rdg
= build_empty_rdg (VEC_length (gimple
, stmts
));
5079 create_rdg_vertices (rdg
, stmts
);
5080 create_rdg_edges (rdg
, *dependence_relations
);
5083 VEC_free (gimple
, heap
, stmts
);
5087 /* Free the reduced dependence graph RDG. */
5090 free_rdg (struct graph
*rdg
)
5094 for (i
= 0; i
< rdg
->n_vertices
; i
++)
5096 struct vertex
*v
= &(rdg
->vertices
[i
]);
5097 struct graph_edge
*e
;
5099 for (e
= v
->succ
; e
; e
= e
->succ_next
)
5105 htab_delete (rdg
->indices
);
5109 /* Initialize STMTS with all the statements of LOOP that contain a
5113 stores_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
5116 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5118 for (i
= 0; i
< loop
->num_nodes
; i
++)
5120 basic_block bb
= bbs
[i
];
5121 gimple_stmt_iterator bsi
;
5123 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5124 if (gimple_vdef (gsi_stmt (bsi
)))
5125 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (bsi
));
5131 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5132 that contains a data reference on its LHS with a stride of the same
5133 size as its unit type. */
5136 stmt_with_adjacent_zero_store_dr_p (gimple stmt
)
5140 struct data_reference
*dr
;
5143 || !gimple_vdef (stmt
)
5144 || !is_gimple_assign (stmt
)
5145 || !gimple_assign_single_p (stmt
)
5146 || !(op1
= gimple_assign_rhs1 (stmt
))
5147 || !(integer_zerop (op1
) || real_zerop (op1
)))
5150 dr
= XCNEW (struct data_reference
);
5151 op0
= gimple_assign_lhs (stmt
);
5153 DR_STMT (dr
) = stmt
;
5156 res
= dr_analyze_innermost (dr
)
5157 && stride_of_unit_type_p (DR_STEP (dr
), TREE_TYPE (op0
));
5163 /* Initialize STMTS with all the statements of LOOP that contain a
5164 store to memory of the form "A[i] = 0". */
5167 stores_zero_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
5171 gimple_stmt_iterator si
;
5173 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5175 for (i
= 0; i
< loop
->num_nodes
; i
++)
5176 for (bb
= bbs
[i
], si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
5177 if ((stmt
= gsi_stmt (si
))
5178 && stmt_with_adjacent_zero_store_dr_p (stmt
))
5179 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (si
));
5184 /* For a data reference REF, return the declaration of its base
5185 address or NULL_TREE if the base is not determined. */
5188 ref_base_address (gimple stmt
, data_ref_loc
*ref
)
5190 tree base
= NULL_TREE
;
5192 struct data_reference
*dr
= XCNEW (struct data_reference
);
5194 DR_STMT (dr
) = stmt
;
5195 DR_REF (dr
) = *ref
->pos
;
5196 dr_analyze_innermost (dr
);
5197 base_address
= DR_BASE_ADDRESS (dr
);
5202 switch (TREE_CODE (base_address
))
5205 base
= TREE_OPERAND (base_address
, 0);
5209 base
= base_address
;
5218 /* Determines whether the statement from vertex V of the RDG has a
5219 definition used outside the loop that contains this statement. */
5222 rdg_defs_used_in_other_loops_p (struct graph
*rdg
, int v
)
5224 gimple stmt
= RDG_STMT (rdg
, v
);
5225 struct loop
*loop
= loop_containing_stmt (stmt
);
5226 use_operand_p imm_use_p
;
5227 imm_use_iterator iterator
;
5229 def_operand_p def_p
;
5234 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, it
, SSA_OP_DEF
)
5236 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, DEF_FROM_PTR (def_p
))
5238 if (loop_containing_stmt (USE_STMT (imm_use_p
)) != loop
)
5246 /* Determines whether statements S1 and S2 access to similar memory
5247 locations. Two memory accesses are considered similar when they
5248 have the same base address declaration, i.e. when their
5249 ref_base_address is the same. */
5252 have_similar_memory_accesses (gimple s1
, gimple s2
)
5256 VEC (data_ref_loc
, heap
) *refs1
, *refs2
;
5257 data_ref_loc
*ref1
, *ref2
;
5259 get_references_in_stmt (s1
, &refs1
);
5260 get_references_in_stmt (s2
, &refs2
);
5262 FOR_EACH_VEC_ELT (data_ref_loc
, refs1
, i
, ref1
)
5264 tree base1
= ref_base_address (s1
, ref1
);
5267 FOR_EACH_VEC_ELT (data_ref_loc
, refs2
, j
, ref2
)
5268 if (base1
== ref_base_address (s2
, ref2
))
5276 VEC_free (data_ref_loc
, heap
, refs1
);
5277 VEC_free (data_ref_loc
, heap
, refs2
);
5281 /* Helper function for the hashtab. */
5284 have_similar_memory_accesses_1 (const void *s1
, const void *s2
)
5286 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple
) s1
),
5287 CONST_CAST_GIMPLE ((const_gimple
) s2
));
5290 /* Helper function for the hashtab. */
5293 ref_base_address_1 (const void *s
)
5295 gimple stmt
= CONST_CAST_GIMPLE ((const_gimple
) s
);
5297 VEC (data_ref_loc
, heap
) *refs
;
5301 get_references_in_stmt (stmt
, &refs
);
5303 FOR_EACH_VEC_ELT (data_ref_loc
, refs
, i
, ref
)
5306 res
= htab_hash_pointer (ref_base_address (stmt
, ref
));
5310 VEC_free (data_ref_loc
, heap
, refs
);
5314 /* Try to remove duplicated write data references from STMTS. */
5317 remove_similar_memory_refs (VEC (gimple
, heap
) **stmts
)
5321 htab_t seen
= htab_create (VEC_length (gimple
, *stmts
), ref_base_address_1
,
5322 have_similar_memory_accesses_1
, NULL
);
5324 for (i
= 0; VEC_iterate (gimple
, *stmts
, i
, stmt
); )
5328 slot
= htab_find_slot (seen
, stmt
, INSERT
);
5331 VEC_ordered_remove (gimple
, *stmts
, i
);
5334 *slot
= (void *) stmt
;
5342 /* Returns the index of PARAMETER in the parameters vector of the
5343 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5346 access_matrix_get_index_for_parameter (tree parameter
,
5347 struct access_matrix
*access_matrix
)
5350 VEC (tree
,heap
) *lambda_parameters
= AM_PARAMETERS (access_matrix
);
5351 tree lambda_parameter
;
5353 FOR_EACH_VEC_ELT (tree
, lambda_parameters
, i
, lambda_parameter
)
5354 if (lambda_parameter
== parameter
)
5355 return i
+ AM_NB_INDUCTION_VARS (access_matrix
);