]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-data-ref.c
Merge from trunk.
[thirdparty/gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "tree.h"
80 #include "expr.h"
81 #include "gimple-pretty-print.h"
82 #include "gimple.h"
83 #include "gimple-iterator.h"
84 #include "tree-ssa-loop-niter.h"
85 #include "tree-ssa-loop.h"
86 #include "tree-ssa.h"
87 #include "cfgloop.h"
88 #include "tree-data-ref.h"
89 #include "tree-scalar-evolution.h"
90 #include "dumpfile.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
93 #include "params.h"
94
95 static struct datadep_stats
96 {
97 int num_dependence_tests;
98 int num_dependence_dependent;
99 int num_dependence_independent;
100 int num_dependence_undetermined;
101
102 int num_subscript_tests;
103 int num_subscript_undetermined;
104 int num_same_subscript_function;
105
106 int num_ziv;
107 int num_ziv_independent;
108 int num_ziv_dependent;
109 int num_ziv_unimplemented;
110
111 int num_siv;
112 int num_siv_independent;
113 int num_siv_dependent;
114 int num_siv_unimplemented;
115
116 int num_miv;
117 int num_miv_independent;
118 int num_miv_dependent;
119 int num_miv_unimplemented;
120 } dependence_stats;
121
122 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
123 struct data_reference *,
124 struct data_reference *,
125 struct loop *);
126 /* Returns true iff A divides B. */
127
128 static inline bool
129 tree_fold_divides_p (const_tree a, const_tree b)
130 {
131 gcc_assert (TREE_CODE (a) == INTEGER_CST);
132 gcc_assert (TREE_CODE (b) == INTEGER_CST);
133 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
134 }
135
136 /* Returns true iff A divides B. */
137
138 static inline bool
139 int_divides_p (int a, int b)
140 {
141 return ((b % a) == 0);
142 }
143
144 \f
145
146 /* Dump into FILE all the data references from DATAREFS. */
147
148 static void
149 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
150 {
151 unsigned int i;
152 struct data_reference *dr;
153
154 FOR_EACH_VEC_ELT (datarefs, i, dr)
155 dump_data_reference (file, dr);
156 }
157
158 /* Unified dump into FILE all the data references from DATAREFS. */
159
160 DEBUG_FUNCTION void
161 debug (vec<data_reference_p> &ref)
162 {
163 dump_data_references (stderr, ref);
164 }
165
166 DEBUG_FUNCTION void
167 debug (vec<data_reference_p> *ptr)
168 {
169 if (ptr)
170 debug (*ptr);
171 else
172 fprintf (stderr, "<nil>\n");
173 }
174
175
176 /* Dump into STDERR all the data references from DATAREFS. */
177
178 DEBUG_FUNCTION void
179 debug_data_references (vec<data_reference_p> datarefs)
180 {
181 dump_data_references (stderr, datarefs);
182 }
183
184 /* Print to STDERR the data_reference DR. */
185
186 DEBUG_FUNCTION void
187 debug_data_reference (struct data_reference *dr)
188 {
189 dump_data_reference (stderr, dr);
190 }
191
192 /* Dump function for a DATA_REFERENCE structure. */
193
194 void
195 dump_data_reference (FILE *outf,
196 struct data_reference *dr)
197 {
198 unsigned int i;
199
200 fprintf (outf, "#(Data Ref: \n");
201 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
202 fprintf (outf, "# stmt: ");
203 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
204 fprintf (outf, "# ref: ");
205 print_generic_stmt (outf, DR_REF (dr), 0);
206 fprintf (outf, "# base_object: ");
207 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
208
209 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
210 {
211 fprintf (outf, "# Access function %d: ", i);
212 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
213 }
214 fprintf (outf, "#)\n");
215 }
216
217 /* Unified dump function for a DATA_REFERENCE structure. */
218
219 DEBUG_FUNCTION void
220 debug (data_reference &ref)
221 {
222 dump_data_reference (stderr, &ref);
223 }
224
225 DEBUG_FUNCTION void
226 debug (data_reference *ptr)
227 {
228 if (ptr)
229 debug (*ptr);
230 else
231 fprintf (stderr, "<nil>\n");
232 }
233
234
235 /* Dumps the affine function described by FN to the file OUTF. */
236
237 static void
238 dump_affine_function (FILE *outf, affine_fn fn)
239 {
240 unsigned i;
241 tree coef;
242
243 print_generic_expr (outf, fn[0], TDF_SLIM);
244 for (i = 1; fn.iterate (i, &coef); i++)
245 {
246 fprintf (outf, " + ");
247 print_generic_expr (outf, coef, TDF_SLIM);
248 fprintf (outf, " * x_%u", i);
249 }
250 }
251
252 /* Dumps the conflict function CF to the file OUTF. */
253
254 static void
255 dump_conflict_function (FILE *outf, conflict_function *cf)
256 {
257 unsigned i;
258
259 if (cf->n == NO_DEPENDENCE)
260 fprintf (outf, "no dependence");
261 else if (cf->n == NOT_KNOWN)
262 fprintf (outf, "not known");
263 else
264 {
265 for (i = 0; i < cf->n; i++)
266 {
267 if (i != 0)
268 fprintf (outf, " ");
269 fprintf (outf, "[");
270 dump_affine_function (outf, cf->fns[i]);
271 fprintf (outf, "]");
272 }
273 }
274 }
275
276 /* Dump function for a SUBSCRIPT structure. */
277
278 static void
279 dump_subscript (FILE *outf, struct subscript *subscript)
280 {
281 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
282
283 fprintf (outf, "\n (subscript \n");
284 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
285 dump_conflict_function (outf, cf);
286 if (CF_NONTRIVIAL_P (cf))
287 {
288 tree last_iteration = SUB_LAST_CONFLICT (subscript);
289 fprintf (outf, "\n last_conflict: ");
290 print_generic_expr (outf, last_iteration, 0);
291 }
292
293 cf = SUB_CONFLICTS_IN_B (subscript);
294 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
295 dump_conflict_function (outf, cf);
296 if (CF_NONTRIVIAL_P (cf))
297 {
298 tree last_iteration = SUB_LAST_CONFLICT (subscript);
299 fprintf (outf, "\n last_conflict: ");
300 print_generic_expr (outf, last_iteration, 0);
301 }
302
303 fprintf (outf, "\n (Subscript distance: ");
304 print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
305 fprintf (outf, " ))\n");
306 }
307
308 /* Print the classic direction vector DIRV to OUTF. */
309
310 static void
311 print_direction_vector (FILE *outf,
312 lambda_vector dirv,
313 int length)
314 {
315 int eq;
316
317 for (eq = 0; eq < length; eq++)
318 {
319 enum data_dependence_direction dir = ((enum data_dependence_direction)
320 dirv[eq]);
321
322 switch (dir)
323 {
324 case dir_positive:
325 fprintf (outf, " +");
326 break;
327 case dir_negative:
328 fprintf (outf, " -");
329 break;
330 case dir_equal:
331 fprintf (outf, " =");
332 break;
333 case dir_positive_or_equal:
334 fprintf (outf, " +=");
335 break;
336 case dir_positive_or_negative:
337 fprintf (outf, " +-");
338 break;
339 case dir_negative_or_equal:
340 fprintf (outf, " -=");
341 break;
342 case dir_star:
343 fprintf (outf, " *");
344 break;
345 default:
346 fprintf (outf, "indep");
347 break;
348 }
349 }
350 fprintf (outf, "\n");
351 }
352
353 /* Print a vector of direction vectors. */
354
355 static void
356 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
357 int length)
358 {
359 unsigned j;
360 lambda_vector v;
361
362 FOR_EACH_VEC_ELT (dir_vects, j, v)
363 print_direction_vector (outf, v, length);
364 }
365
366 /* Print out a vector VEC of length N to OUTFILE. */
367
368 static inline void
369 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
370 {
371 int i;
372
373 for (i = 0; i < n; i++)
374 fprintf (outfile, "%3d ", vector[i]);
375 fprintf (outfile, "\n");
376 }
377
378 /* Print a vector of distance vectors. */
379
380 static void
381 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
382 int length)
383 {
384 unsigned j;
385 lambda_vector v;
386
387 FOR_EACH_VEC_ELT (dist_vects, j, v)
388 print_lambda_vector (outf, v, length);
389 }
390
391 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
392
393 static void
394 dump_data_dependence_relation (FILE *outf,
395 struct data_dependence_relation *ddr)
396 {
397 struct data_reference *dra, *drb;
398
399 fprintf (outf, "(Data Dep: \n");
400
401 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
402 {
403 if (ddr)
404 {
405 dra = DDR_A (ddr);
406 drb = DDR_B (ddr);
407 if (dra)
408 dump_data_reference (outf, dra);
409 else
410 fprintf (outf, " (nil)\n");
411 if (drb)
412 dump_data_reference (outf, drb);
413 else
414 fprintf (outf, " (nil)\n");
415 }
416 fprintf (outf, " (don't know)\n)\n");
417 return;
418 }
419
420 dra = DDR_A (ddr);
421 drb = DDR_B (ddr);
422 dump_data_reference (outf, dra);
423 dump_data_reference (outf, drb);
424
425 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
426 fprintf (outf, " (no dependence)\n");
427
428 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
429 {
430 unsigned int i;
431 struct loop *loopi;
432
433 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
434 {
435 fprintf (outf, " access_fn_A: ");
436 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
437 fprintf (outf, " access_fn_B: ");
438 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
439 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
440 }
441
442 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
443 fprintf (outf, " loop nest: (");
444 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
445 fprintf (outf, "%d ", loopi->num);
446 fprintf (outf, ")\n");
447
448 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
449 {
450 fprintf (outf, " distance_vector: ");
451 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
452 DDR_NB_LOOPS (ddr));
453 }
454
455 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
456 {
457 fprintf (outf, " direction_vector: ");
458 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
459 DDR_NB_LOOPS (ddr));
460 }
461 }
462
463 fprintf (outf, ")\n");
464 }
465
466 /* Debug version. */
467
468 DEBUG_FUNCTION void
469 debug_data_dependence_relation (struct data_dependence_relation *ddr)
470 {
471 dump_data_dependence_relation (stderr, ddr);
472 }
473
474 /* Dump into FILE all the dependence relations from DDRS. */
475
476 void
477 dump_data_dependence_relations (FILE *file,
478 vec<ddr_p> ddrs)
479 {
480 unsigned int i;
481 struct data_dependence_relation *ddr;
482
483 FOR_EACH_VEC_ELT (ddrs, i, ddr)
484 dump_data_dependence_relation (file, ddr);
485 }
486
487 DEBUG_FUNCTION void
488 debug (vec<ddr_p> &ref)
489 {
490 dump_data_dependence_relations (stderr, ref);
491 }
492
493 DEBUG_FUNCTION void
494 debug (vec<ddr_p> *ptr)
495 {
496 if (ptr)
497 debug (*ptr);
498 else
499 fprintf (stderr, "<nil>\n");
500 }
501
502
503 /* Dump to STDERR all the dependence relations from DDRS. */
504
505 DEBUG_FUNCTION void
506 debug_data_dependence_relations (vec<ddr_p> ddrs)
507 {
508 dump_data_dependence_relations (stderr, ddrs);
509 }
510
511 /* Dumps the distance and direction vectors in FILE. DDRS contains
512 the dependence relations, and VECT_SIZE is the size of the
513 dependence vectors, or in other words the number of loops in the
514 considered nest. */
515
516 static void
517 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
518 {
519 unsigned int i, j;
520 struct data_dependence_relation *ddr;
521 lambda_vector v;
522
523 FOR_EACH_VEC_ELT (ddrs, i, ddr)
524 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
525 {
526 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
527 {
528 fprintf (file, "DISTANCE_V (");
529 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
530 fprintf (file, ")\n");
531 }
532
533 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
534 {
535 fprintf (file, "DIRECTION_V (");
536 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
537 fprintf (file, ")\n");
538 }
539 }
540
541 fprintf (file, "\n\n");
542 }
543
544 /* Dumps the data dependence relations DDRS in FILE. */
545
546 static void
547 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
548 {
549 unsigned int i;
550 struct data_dependence_relation *ddr;
551
552 FOR_EACH_VEC_ELT (ddrs, i, ddr)
553 dump_data_dependence_relation (file, ddr);
554
555 fprintf (file, "\n\n");
556 }
557
558 DEBUG_FUNCTION void
559 debug_ddrs (vec<ddr_p> ddrs)
560 {
561 dump_ddrs (stderr, ddrs);
562 }
563
564 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
565 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
566 constant of type ssizetype, and returns true. If we cannot do this
567 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
568 is returned. */
569
570 static bool
571 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
572 tree *var, tree *off)
573 {
574 tree var0, var1;
575 tree off0, off1;
576 enum tree_code ocode = code;
577
578 *var = NULL_TREE;
579 *off = NULL_TREE;
580
581 switch (code)
582 {
583 case INTEGER_CST:
584 *var = build_int_cst (type, 0);
585 *off = fold_convert (ssizetype, op0);
586 return true;
587
588 case POINTER_PLUS_EXPR:
589 ocode = PLUS_EXPR;
590 /* FALLTHROUGH */
591 case PLUS_EXPR:
592 case MINUS_EXPR:
593 split_constant_offset (op0, &var0, &off0);
594 split_constant_offset (op1, &var1, &off1);
595 *var = fold_build2 (code, type, var0, var1);
596 *off = size_binop (ocode, off0, off1);
597 return true;
598
599 case MULT_EXPR:
600 if (TREE_CODE (op1) != INTEGER_CST)
601 return false;
602
603 split_constant_offset (op0, &var0, &off0);
604 *var = fold_build2 (MULT_EXPR, type, var0, op1);
605 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
606 return true;
607
608 case ADDR_EXPR:
609 {
610 tree base, poffset;
611 HOST_WIDE_INT pbitsize, pbitpos;
612 enum machine_mode pmode;
613 int punsignedp, pvolatilep;
614
615 op0 = TREE_OPERAND (op0, 0);
616 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
617 &pmode, &punsignedp, &pvolatilep, false);
618
619 if (pbitpos % BITS_PER_UNIT != 0)
620 return false;
621 base = build_fold_addr_expr (base);
622 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
623
624 if (poffset)
625 {
626 split_constant_offset (poffset, &poffset, &off1);
627 off0 = size_binop (PLUS_EXPR, off0, off1);
628 if (POINTER_TYPE_P (TREE_TYPE (base)))
629 base = fold_build_pointer_plus (base, poffset);
630 else
631 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
632 fold_convert (TREE_TYPE (base), poffset));
633 }
634
635 var0 = fold_convert (type, base);
636
637 /* If variable length types are involved, punt, otherwise casts
638 might be converted into ARRAY_REFs in gimplify_conversion.
639 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
640 possibly no longer appears in current GIMPLE, might resurface.
641 This perhaps could run
642 if (CONVERT_EXPR_P (var0))
643 {
644 gimplify_conversion (&var0);
645 // Attempt to fill in any within var0 found ARRAY_REF's
646 // element size from corresponding op embedded ARRAY_REF,
647 // if unsuccessful, just punt.
648 } */
649 while (POINTER_TYPE_P (type))
650 type = TREE_TYPE (type);
651 if (int_size_in_bytes (type) < 0)
652 return false;
653
654 *var = var0;
655 *off = off0;
656 return true;
657 }
658
659 case SSA_NAME:
660 {
661 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
662 enum tree_code subcode;
663
664 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
665 return false;
666
667 var0 = gimple_assign_rhs1 (def_stmt);
668 subcode = gimple_assign_rhs_code (def_stmt);
669 var1 = gimple_assign_rhs2 (def_stmt);
670
671 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
672 }
673 CASE_CONVERT:
674 {
675 /* We must not introduce undefined overflow, and we must not change the value.
676 Hence we're okay if the inner type doesn't overflow to start with
677 (pointer or signed), the outer type also is an integer or pointer
678 and the outer precision is at least as large as the inner. */
679 tree itype = TREE_TYPE (op0);
680 if ((POINTER_TYPE_P (itype)
681 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
682 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
683 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
684 {
685 split_constant_offset (op0, &var0, off);
686 *var = fold_convert (type, var0);
687 return true;
688 }
689 return false;
690 }
691
692 default:
693 return false;
694 }
695 }
696
697 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
698 will be ssizetype. */
699
700 void
701 split_constant_offset (tree exp, tree *var, tree *off)
702 {
703 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
704 enum tree_code code;
705
706 *var = exp;
707 *off = ssize_int (0);
708 STRIP_NOPS (exp);
709
710 if (tree_is_chrec (exp)
711 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
712 return;
713
714 otype = TREE_TYPE (exp);
715 code = TREE_CODE (exp);
716 extract_ops_from_tree (exp, &code, &op0, &op1);
717 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
718 {
719 *var = fold_convert (type, e);
720 *off = o;
721 }
722 }
723
724 /* Returns the address ADDR of an object in a canonical shape (without nop
725 casts, and with type of pointer to the object). */
726
727 static tree
728 canonicalize_base_object_address (tree addr)
729 {
730 tree orig = addr;
731
732 STRIP_NOPS (addr);
733
734 /* The base address may be obtained by casting from integer, in that case
735 keep the cast. */
736 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
737 return orig;
738
739 if (TREE_CODE (addr) != ADDR_EXPR)
740 return addr;
741
742 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
743 }
744
745 /* Analyzes the behavior of the memory reference DR in the innermost loop or
746 basic block that contains it. Returns true if analysis succeed or false
747 otherwise. */
748
749 bool
750 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
751 {
752 gimple stmt = DR_STMT (dr);
753 struct loop *loop = loop_containing_stmt (stmt);
754 tree ref = DR_REF (dr);
755 HOST_WIDE_INT pbitsize, pbitpos;
756 tree base, poffset;
757 enum machine_mode pmode;
758 int punsignedp, pvolatilep;
759 affine_iv base_iv, offset_iv;
760 tree init, dinit, step;
761 bool in_loop = (loop && loop->num);
762
763 if (dump_file && (dump_flags & TDF_DETAILS))
764 fprintf (dump_file, "analyze_innermost: ");
765
766 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
767 &pmode, &punsignedp, &pvolatilep, false);
768 gcc_assert (base != NULL_TREE);
769
770 if (pbitpos % BITS_PER_UNIT != 0)
771 {
772 if (dump_file && (dump_flags & TDF_DETAILS))
773 fprintf (dump_file, "failed: bit offset alignment.\n");
774 return false;
775 }
776
777 if (TREE_CODE (base) == MEM_REF)
778 {
779 if (!integer_zerop (TREE_OPERAND (base, 1)))
780 {
781 offset_int moff = mem_ref_offset (base);
782 tree mofft = wide_int_to_tree (sizetype, moff);
783 if (!poffset)
784 poffset = mofft;
785 else
786 poffset = size_binop (PLUS_EXPR, poffset, mofft);
787 }
788 base = TREE_OPERAND (base, 0);
789 }
790 else
791 base = build_fold_addr_expr (base);
792
793 if (in_loop)
794 {
795 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
796 nest ? true : false))
797 {
798 if (nest)
799 {
800 if (dump_file && (dump_flags & TDF_DETAILS))
801 fprintf (dump_file, "failed: evolution of base is not"
802 " affine.\n");
803 return false;
804 }
805 else
806 {
807 base_iv.base = base;
808 base_iv.step = ssize_int (0);
809 base_iv.no_overflow = true;
810 }
811 }
812 }
813 else
814 {
815 base_iv.base = base;
816 base_iv.step = ssize_int (0);
817 base_iv.no_overflow = true;
818 }
819
820 if (!poffset)
821 {
822 offset_iv.base = ssize_int (0);
823 offset_iv.step = ssize_int (0);
824 }
825 else
826 {
827 if (!in_loop)
828 {
829 offset_iv.base = poffset;
830 offset_iv.step = ssize_int (0);
831 }
832 else if (!simple_iv (loop, loop_containing_stmt (stmt),
833 poffset, &offset_iv,
834 nest ? true : false))
835 {
836 if (nest)
837 {
838 if (dump_file && (dump_flags & TDF_DETAILS))
839 fprintf (dump_file, "failed: evolution of offset is not"
840 " affine.\n");
841 return false;
842 }
843 else
844 {
845 offset_iv.base = poffset;
846 offset_iv.step = ssize_int (0);
847 }
848 }
849 }
850
851 init = ssize_int (pbitpos / BITS_PER_UNIT);
852 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
853 init = size_binop (PLUS_EXPR, init, dinit);
854 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
855 init = size_binop (PLUS_EXPR, init, dinit);
856
857 step = size_binop (PLUS_EXPR,
858 fold_convert (ssizetype, base_iv.step),
859 fold_convert (ssizetype, offset_iv.step));
860
861 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
862
863 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
864 DR_INIT (dr) = init;
865 DR_STEP (dr) = step;
866
867 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
868
869 if (dump_file && (dump_flags & TDF_DETAILS))
870 fprintf (dump_file, "success.\n");
871
872 return true;
873 }
874
875 /* Determines the base object and the list of indices of memory reference
876 DR, analyzed in LOOP and instantiated in loop nest NEST. */
877
878 static void
879 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
880 {
881 vec<tree> access_fns = vNULL;
882 tree ref, op;
883 tree base, off, access_fn;
884 basic_block before_loop;
885
886 /* If analyzing a basic-block there are no indices to analyze
887 and thus no access functions. */
888 if (!nest)
889 {
890 DR_BASE_OBJECT (dr) = DR_REF (dr);
891 DR_ACCESS_FNS (dr).create (0);
892 return;
893 }
894
895 ref = DR_REF (dr);
896 before_loop = block_before_loop (nest);
897
898 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
899 into a two element array with a constant index. The base is
900 then just the immediate underlying object. */
901 if (TREE_CODE (ref) == REALPART_EXPR)
902 {
903 ref = TREE_OPERAND (ref, 0);
904 access_fns.safe_push (integer_zero_node);
905 }
906 else if (TREE_CODE (ref) == IMAGPART_EXPR)
907 {
908 ref = TREE_OPERAND (ref, 0);
909 access_fns.safe_push (integer_one_node);
910 }
911
912 /* Analyze access functions of dimensions we know to be independent. */
913 while (handled_component_p (ref))
914 {
915 if (TREE_CODE (ref) == ARRAY_REF)
916 {
917 op = TREE_OPERAND (ref, 1);
918 access_fn = analyze_scalar_evolution (loop, op);
919 access_fn = instantiate_scev (before_loop, loop, access_fn);
920 access_fns.safe_push (access_fn);
921 }
922 else if (TREE_CODE (ref) == COMPONENT_REF
923 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
924 {
925 /* For COMPONENT_REFs of records (but not unions!) use the
926 FIELD_DECL offset as constant access function so we can
927 disambiguate a[i].f1 and a[i].f2. */
928 tree off = component_ref_field_offset (ref);
929 off = size_binop (PLUS_EXPR,
930 size_binop (MULT_EXPR,
931 fold_convert (bitsizetype, off),
932 bitsize_int (BITS_PER_UNIT)),
933 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
934 access_fns.safe_push (off);
935 }
936 else
937 /* If we have an unhandled component we could not translate
938 to an access function stop analyzing. We have determined
939 our base object in this case. */
940 break;
941
942 ref = TREE_OPERAND (ref, 0);
943 }
944
945 /* If the address operand of a MEM_REF base has an evolution in the
946 analyzed nest, add it as an additional independent access-function. */
947 if (TREE_CODE (ref) == MEM_REF)
948 {
949 op = TREE_OPERAND (ref, 0);
950 access_fn = analyze_scalar_evolution (loop, op);
951 access_fn = instantiate_scev (before_loop, loop, access_fn);
952 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
953 {
954 tree orig_type;
955 tree memoff = TREE_OPERAND (ref, 1);
956 base = initial_condition (access_fn);
957 orig_type = TREE_TYPE (base);
958 STRIP_USELESS_TYPE_CONVERSION (base);
959 split_constant_offset (base, &base, &off);
960 /* Fold the MEM_REF offset into the evolutions initial
961 value to make more bases comparable. */
962 if (!integer_zerop (memoff))
963 {
964 off = size_binop (PLUS_EXPR, off,
965 fold_convert (ssizetype, memoff));
966 memoff = build_int_cst (TREE_TYPE (memoff), 0);
967 }
968 access_fn = chrec_replace_initial_condition
969 (access_fn, fold_convert (orig_type, off));
970 /* ??? This is still not a suitable base object for
971 dr_may_alias_p - the base object needs to be an
972 access that covers the object as whole. With
973 an evolution in the pointer this cannot be
974 guaranteed.
975 As a band-aid, mark the access so we can special-case
976 it in dr_may_alias_p. */
977 ref = fold_build2_loc (EXPR_LOCATION (ref),
978 MEM_REF, TREE_TYPE (ref),
979 base, memoff);
980 DR_UNCONSTRAINED_BASE (dr) = true;
981 access_fns.safe_push (access_fn);
982 }
983 }
984 else if (DECL_P (ref))
985 {
986 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
987 ref = build2 (MEM_REF, TREE_TYPE (ref),
988 build_fold_addr_expr (ref),
989 build_int_cst (reference_alias_ptr_type (ref), 0));
990 }
991
992 DR_BASE_OBJECT (dr) = ref;
993 DR_ACCESS_FNS (dr) = access_fns;
994 }
995
996 /* Extracts the alias analysis information from the memory reference DR. */
997
998 static void
999 dr_analyze_alias (struct data_reference *dr)
1000 {
1001 tree ref = DR_REF (dr);
1002 tree base = get_base_address (ref), addr;
1003
1004 if (INDIRECT_REF_P (base)
1005 || TREE_CODE (base) == MEM_REF)
1006 {
1007 addr = TREE_OPERAND (base, 0);
1008 if (TREE_CODE (addr) == SSA_NAME)
1009 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1010 }
1011 }
1012
1013 /* Frees data reference DR. */
1014
1015 void
1016 free_data_ref (data_reference_p dr)
1017 {
1018 DR_ACCESS_FNS (dr).release ();
1019 free (dr);
1020 }
1021
1022 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1023 is read if IS_READ is true, write otherwise. Returns the
1024 data_reference description of MEMREF. NEST is the outermost loop
1025 in which the reference should be instantiated, LOOP is the loop in
1026 which the data reference should be analyzed. */
1027
1028 struct data_reference *
1029 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1030 bool is_read)
1031 {
1032 struct data_reference *dr;
1033
1034 if (dump_file && (dump_flags & TDF_DETAILS))
1035 {
1036 fprintf (dump_file, "Creating dr for ");
1037 print_generic_expr (dump_file, memref, TDF_SLIM);
1038 fprintf (dump_file, "\n");
1039 }
1040
1041 dr = XCNEW (struct data_reference);
1042 DR_STMT (dr) = stmt;
1043 DR_REF (dr) = memref;
1044 DR_IS_READ (dr) = is_read;
1045
1046 dr_analyze_innermost (dr, nest);
1047 dr_analyze_indices (dr, nest, loop);
1048 dr_analyze_alias (dr);
1049
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1051 {
1052 unsigned i;
1053 fprintf (dump_file, "\tbase_address: ");
1054 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1055 fprintf (dump_file, "\n\toffset from base address: ");
1056 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1057 fprintf (dump_file, "\n\tconstant offset from base address: ");
1058 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1059 fprintf (dump_file, "\n\tstep: ");
1060 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1061 fprintf (dump_file, "\n\taligned to: ");
1062 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1063 fprintf (dump_file, "\n\tbase_object: ");
1064 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1065 fprintf (dump_file, "\n");
1066 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1067 {
1068 fprintf (dump_file, "\tAccess function %d: ", i);
1069 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1070 }
1071 }
1072
1073 return dr;
1074 }
1075
1076 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1077 expressions. */
1078 static bool
1079 dr_equal_offsets_p1 (tree offset1, tree offset2)
1080 {
1081 bool res;
1082
1083 STRIP_NOPS (offset1);
1084 STRIP_NOPS (offset2);
1085
1086 if (offset1 == offset2)
1087 return true;
1088
1089 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1090 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1091 return false;
1092
1093 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1094 TREE_OPERAND (offset2, 0));
1095
1096 if (!res || !BINARY_CLASS_P (offset1))
1097 return res;
1098
1099 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1100 TREE_OPERAND (offset2, 1));
1101
1102 return res;
1103 }
1104
1105 /* Check if DRA and DRB have equal offsets. */
1106 bool
1107 dr_equal_offsets_p (struct data_reference *dra,
1108 struct data_reference *drb)
1109 {
1110 tree offset1, offset2;
1111
1112 offset1 = DR_OFFSET (dra);
1113 offset2 = DR_OFFSET (drb);
1114
1115 return dr_equal_offsets_p1 (offset1, offset2);
1116 }
1117
1118 /* Returns true if FNA == FNB. */
1119
1120 static bool
1121 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1122 {
1123 unsigned i, n = fna.length ();
1124
1125 if (n != fnb.length ())
1126 return false;
1127
1128 for (i = 0; i < n; i++)
1129 if (!operand_equal_p (fna[i], fnb[i], 0))
1130 return false;
1131
1132 return true;
1133 }
1134
1135 /* If all the functions in CF are the same, returns one of them,
1136 otherwise returns NULL. */
1137
1138 static affine_fn
1139 common_affine_function (conflict_function *cf)
1140 {
1141 unsigned i;
1142 affine_fn comm;
1143
1144 if (!CF_NONTRIVIAL_P (cf))
1145 return affine_fn ();
1146
1147 comm = cf->fns[0];
1148
1149 for (i = 1; i < cf->n; i++)
1150 if (!affine_function_equal_p (comm, cf->fns[i]))
1151 return affine_fn ();
1152
1153 return comm;
1154 }
1155
1156 /* Returns the base of the affine function FN. */
1157
1158 static tree
1159 affine_function_base (affine_fn fn)
1160 {
1161 return fn[0];
1162 }
1163
1164 /* Returns true if FN is a constant. */
1165
1166 static bool
1167 affine_function_constant_p (affine_fn fn)
1168 {
1169 unsigned i;
1170 tree coef;
1171
1172 for (i = 1; fn.iterate (i, &coef); i++)
1173 if (!integer_zerop (coef))
1174 return false;
1175
1176 return true;
1177 }
1178
1179 /* Returns true if FN is the zero constant function. */
1180
1181 static bool
1182 affine_function_zero_p (affine_fn fn)
1183 {
1184 return (integer_zerop (affine_function_base (fn))
1185 && affine_function_constant_p (fn));
1186 }
1187
1188 /* Returns a signed integer type with the largest precision from TA
1189 and TB. */
1190
1191 static tree
1192 signed_type_for_types (tree ta, tree tb)
1193 {
1194 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1195 return signed_type_for (ta);
1196 else
1197 return signed_type_for (tb);
1198 }
1199
1200 /* Applies operation OP on affine functions FNA and FNB, and returns the
1201 result. */
1202
1203 static affine_fn
1204 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1205 {
1206 unsigned i, n, m;
1207 affine_fn ret;
1208 tree coef;
1209
1210 if (fnb.length () > fna.length ())
1211 {
1212 n = fna.length ();
1213 m = fnb.length ();
1214 }
1215 else
1216 {
1217 n = fnb.length ();
1218 m = fna.length ();
1219 }
1220
1221 ret.create (m);
1222 for (i = 0; i < n; i++)
1223 {
1224 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1225 TREE_TYPE (fnb[i]));
1226 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1227 }
1228
1229 for (; fna.iterate (i, &coef); i++)
1230 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1231 coef, integer_zero_node));
1232 for (; fnb.iterate (i, &coef); i++)
1233 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1234 integer_zero_node, coef));
1235
1236 return ret;
1237 }
1238
1239 /* Returns the sum of affine functions FNA and FNB. */
1240
1241 static affine_fn
1242 affine_fn_plus (affine_fn fna, affine_fn fnb)
1243 {
1244 return affine_fn_op (PLUS_EXPR, fna, fnb);
1245 }
1246
1247 /* Returns the difference of affine functions FNA and FNB. */
1248
1249 static affine_fn
1250 affine_fn_minus (affine_fn fna, affine_fn fnb)
1251 {
1252 return affine_fn_op (MINUS_EXPR, fna, fnb);
1253 }
1254
1255 /* Frees affine function FN. */
1256
1257 static void
1258 affine_fn_free (affine_fn fn)
1259 {
1260 fn.release ();
1261 }
1262
1263 /* Determine for each subscript in the data dependence relation DDR
1264 the distance. */
1265
1266 static void
1267 compute_subscript_distance (struct data_dependence_relation *ddr)
1268 {
1269 conflict_function *cf_a, *cf_b;
1270 affine_fn fn_a, fn_b, diff;
1271
1272 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1273 {
1274 unsigned int i;
1275
1276 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1277 {
1278 struct subscript *subscript;
1279
1280 subscript = DDR_SUBSCRIPT (ddr, i);
1281 cf_a = SUB_CONFLICTS_IN_A (subscript);
1282 cf_b = SUB_CONFLICTS_IN_B (subscript);
1283
1284 fn_a = common_affine_function (cf_a);
1285 fn_b = common_affine_function (cf_b);
1286 if (!fn_a.exists () || !fn_b.exists ())
1287 {
1288 SUB_DISTANCE (subscript) = chrec_dont_know;
1289 return;
1290 }
1291 diff = affine_fn_minus (fn_a, fn_b);
1292
1293 if (affine_function_constant_p (diff))
1294 SUB_DISTANCE (subscript) = affine_function_base (diff);
1295 else
1296 SUB_DISTANCE (subscript) = chrec_dont_know;
1297
1298 affine_fn_free (diff);
1299 }
1300 }
1301 }
1302
1303 /* Returns the conflict function for "unknown". */
1304
1305 static conflict_function *
1306 conflict_fn_not_known (void)
1307 {
1308 conflict_function *fn = XCNEW (conflict_function);
1309 fn->n = NOT_KNOWN;
1310
1311 return fn;
1312 }
1313
1314 /* Returns the conflict function for "independent". */
1315
1316 static conflict_function *
1317 conflict_fn_no_dependence (void)
1318 {
1319 conflict_function *fn = XCNEW (conflict_function);
1320 fn->n = NO_DEPENDENCE;
1321
1322 return fn;
1323 }
1324
1325 /* Returns true if the address of OBJ is invariant in LOOP. */
1326
1327 static bool
1328 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1329 {
1330 while (handled_component_p (obj))
1331 {
1332 if (TREE_CODE (obj) == ARRAY_REF)
1333 {
1334 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1335 need to check the stride and the lower bound of the reference. */
1336 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1337 loop->num)
1338 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1339 loop->num))
1340 return false;
1341 }
1342 else if (TREE_CODE (obj) == COMPONENT_REF)
1343 {
1344 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1345 loop->num))
1346 return false;
1347 }
1348 obj = TREE_OPERAND (obj, 0);
1349 }
1350
1351 if (!INDIRECT_REF_P (obj)
1352 && TREE_CODE (obj) != MEM_REF)
1353 return true;
1354
1355 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1356 loop->num);
1357 }
1358
1359 /* Returns false if we can prove that data references A and B do not alias,
1360 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1361 considered. */
1362
1363 bool
1364 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1365 bool loop_nest)
1366 {
1367 tree addr_a = DR_BASE_OBJECT (a);
1368 tree addr_b = DR_BASE_OBJECT (b);
1369
1370 /* If we are not processing a loop nest but scalar code we
1371 do not need to care about possible cross-iteration dependences
1372 and thus can process the full original reference. Do so,
1373 similar to how loop invariant motion applies extra offset-based
1374 disambiguation. */
1375 if (!loop_nest)
1376 {
1377 aff_tree off1, off2;
1378 widest_int size1, size2;
1379 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1380 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1381 aff_combination_scale (&off1, -1);
1382 aff_combination_add (&off2, &off1);
1383 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1384 return false;
1385 }
1386
1387 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1388 the size of the base-object. So we cannot do any offset/overlap
1389 based analysis but have to rely on points-to information only. */
1390 if (TREE_CODE (addr_a) == MEM_REF
1391 && DR_UNCONSTRAINED_BASE (a))
1392 {
1393 if (TREE_CODE (addr_b) == MEM_REF
1394 && DR_UNCONSTRAINED_BASE (b))
1395 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1396 TREE_OPERAND (addr_b, 0));
1397 else
1398 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1399 build_fold_addr_expr (addr_b));
1400 }
1401 else if (TREE_CODE (addr_b) == MEM_REF
1402 && DR_UNCONSTRAINED_BASE (b))
1403 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1404 TREE_OPERAND (addr_b, 0));
1405
1406 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1407 that is being subsetted in the loop nest. */
1408 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1409 return refs_output_dependent_p (addr_a, addr_b);
1410 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1411 return refs_anti_dependent_p (addr_a, addr_b);
1412 return refs_may_alias_p (addr_a, addr_b);
1413 }
1414
1415 /* Initialize a data dependence relation between data accesses A and
1416 B. NB_LOOPS is the number of loops surrounding the references: the
1417 size of the classic distance/direction vectors. */
1418
1419 struct data_dependence_relation *
1420 initialize_data_dependence_relation (struct data_reference *a,
1421 struct data_reference *b,
1422 vec<loop_p> loop_nest)
1423 {
1424 struct data_dependence_relation *res;
1425 unsigned int i;
1426
1427 res = XNEW (struct data_dependence_relation);
1428 DDR_A (res) = a;
1429 DDR_B (res) = b;
1430 DDR_LOOP_NEST (res).create (0);
1431 DDR_REVERSED_P (res) = false;
1432 DDR_SUBSCRIPTS (res).create (0);
1433 DDR_DIR_VECTS (res).create (0);
1434 DDR_DIST_VECTS (res).create (0);
1435
1436 if (a == NULL || b == NULL)
1437 {
1438 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1439 return res;
1440 }
1441
1442 /* If the data references do not alias, then they are independent. */
1443 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1444 {
1445 DDR_ARE_DEPENDENT (res) = chrec_known;
1446 return res;
1447 }
1448
1449 /* The case where the references are exactly the same. */
1450 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1451 {
1452 if (loop_nest.exists ()
1453 && !object_address_invariant_in_loop_p (loop_nest[0],
1454 DR_BASE_OBJECT (a)))
1455 {
1456 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1457 return res;
1458 }
1459 DDR_AFFINE_P (res) = true;
1460 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1461 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1462 DDR_LOOP_NEST (res) = loop_nest;
1463 DDR_INNER_LOOP (res) = 0;
1464 DDR_SELF_REFERENCE (res) = true;
1465 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1466 {
1467 struct subscript *subscript;
1468
1469 subscript = XNEW (struct subscript);
1470 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1471 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1472 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1473 SUB_DISTANCE (subscript) = chrec_dont_know;
1474 DDR_SUBSCRIPTS (res).safe_push (subscript);
1475 }
1476 return res;
1477 }
1478
1479 /* If the references do not access the same object, we do not know
1480 whether they alias or not. */
1481 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1482 {
1483 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1484 return res;
1485 }
1486
1487 /* If the base of the object is not invariant in the loop nest, we cannot
1488 analyze it. TODO -- in fact, it would suffice to record that there may
1489 be arbitrary dependences in the loops where the base object varies. */
1490 if (loop_nest.exists ()
1491 && !object_address_invariant_in_loop_p (loop_nest[0],
1492 DR_BASE_OBJECT (a)))
1493 {
1494 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1495 return res;
1496 }
1497
1498 /* If the number of dimensions of the access to not agree we can have
1499 a pointer access to a component of the array element type and an
1500 array access while the base-objects are still the same. Punt. */
1501 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1502 {
1503 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1504 return res;
1505 }
1506
1507 DDR_AFFINE_P (res) = true;
1508 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1509 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1510 DDR_LOOP_NEST (res) = loop_nest;
1511 DDR_INNER_LOOP (res) = 0;
1512 DDR_SELF_REFERENCE (res) = false;
1513
1514 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1515 {
1516 struct subscript *subscript;
1517
1518 subscript = XNEW (struct subscript);
1519 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1520 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1521 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1522 SUB_DISTANCE (subscript) = chrec_dont_know;
1523 DDR_SUBSCRIPTS (res).safe_push (subscript);
1524 }
1525
1526 return res;
1527 }
1528
1529 /* Frees memory used by the conflict function F. */
1530
1531 static void
1532 free_conflict_function (conflict_function *f)
1533 {
1534 unsigned i;
1535
1536 if (CF_NONTRIVIAL_P (f))
1537 {
1538 for (i = 0; i < f->n; i++)
1539 affine_fn_free (f->fns[i]);
1540 }
1541 free (f);
1542 }
1543
1544 /* Frees memory used by SUBSCRIPTS. */
1545
1546 static void
1547 free_subscripts (vec<subscript_p> subscripts)
1548 {
1549 unsigned i;
1550 subscript_p s;
1551
1552 FOR_EACH_VEC_ELT (subscripts, i, s)
1553 {
1554 free_conflict_function (s->conflicting_iterations_in_a);
1555 free_conflict_function (s->conflicting_iterations_in_b);
1556 free (s);
1557 }
1558 subscripts.release ();
1559 }
1560
1561 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1562 description. */
1563
1564 static inline void
1565 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1566 tree chrec)
1567 {
1568 DDR_ARE_DEPENDENT (ddr) = chrec;
1569 free_subscripts (DDR_SUBSCRIPTS (ddr));
1570 DDR_SUBSCRIPTS (ddr).create (0);
1571 }
1572
1573 /* The dependence relation DDR cannot be represented by a distance
1574 vector. */
1575
1576 static inline void
1577 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1578 {
1579 if (dump_file && (dump_flags & TDF_DETAILS))
1580 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1581
1582 DDR_AFFINE_P (ddr) = false;
1583 }
1584
1585 \f
1586
1587 /* This section contains the classic Banerjee tests. */
1588
1589 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1590 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1591
1592 static inline bool
1593 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1594 {
1595 return (evolution_function_is_constant_p (chrec_a)
1596 && evolution_function_is_constant_p (chrec_b));
1597 }
1598
1599 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1600 variable, i.e., if the SIV (Single Index Variable) test is true. */
1601
1602 static bool
1603 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1604 {
1605 if ((evolution_function_is_constant_p (chrec_a)
1606 && evolution_function_is_univariate_p (chrec_b))
1607 || (evolution_function_is_constant_p (chrec_b)
1608 && evolution_function_is_univariate_p (chrec_a)))
1609 return true;
1610
1611 if (evolution_function_is_univariate_p (chrec_a)
1612 && evolution_function_is_univariate_p (chrec_b))
1613 {
1614 switch (TREE_CODE (chrec_a))
1615 {
1616 case POLYNOMIAL_CHREC:
1617 switch (TREE_CODE (chrec_b))
1618 {
1619 case POLYNOMIAL_CHREC:
1620 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1621 return false;
1622
1623 default:
1624 return true;
1625 }
1626
1627 default:
1628 return true;
1629 }
1630 }
1631
1632 return false;
1633 }
1634
1635 /* Creates a conflict function with N dimensions. The affine functions
1636 in each dimension follow. */
1637
1638 static conflict_function *
1639 conflict_fn (unsigned n, ...)
1640 {
1641 unsigned i;
1642 conflict_function *ret = XCNEW (conflict_function);
1643 va_list ap;
1644
1645 gcc_assert (0 < n && n <= MAX_DIM);
1646 va_start (ap, n);
1647
1648 ret->n = n;
1649 for (i = 0; i < n; i++)
1650 ret->fns[i] = va_arg (ap, affine_fn);
1651 va_end (ap);
1652
1653 return ret;
1654 }
1655
1656 /* Returns constant affine function with value CST. */
1657
1658 static affine_fn
1659 affine_fn_cst (tree cst)
1660 {
1661 affine_fn fn;
1662 fn.create (1);
1663 fn.quick_push (cst);
1664 return fn;
1665 }
1666
1667 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1668
1669 static affine_fn
1670 affine_fn_univar (tree cst, unsigned dim, tree coef)
1671 {
1672 affine_fn fn;
1673 fn.create (dim + 1);
1674 unsigned i;
1675
1676 gcc_assert (dim > 0);
1677 fn.quick_push (cst);
1678 for (i = 1; i < dim; i++)
1679 fn.quick_push (integer_zero_node);
1680 fn.quick_push (coef);
1681 return fn;
1682 }
1683
1684 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1685 *OVERLAPS_B are initialized to the functions that describe the
1686 relation between the elements accessed twice by CHREC_A and
1687 CHREC_B. For k >= 0, the following property is verified:
1688
1689 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1690
1691 static void
1692 analyze_ziv_subscript (tree chrec_a,
1693 tree chrec_b,
1694 conflict_function **overlaps_a,
1695 conflict_function **overlaps_b,
1696 tree *last_conflicts)
1697 {
1698 tree type, difference;
1699 dependence_stats.num_ziv++;
1700
1701 if (dump_file && (dump_flags & TDF_DETAILS))
1702 fprintf (dump_file, "(analyze_ziv_subscript \n");
1703
1704 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1705 chrec_a = chrec_convert (type, chrec_a, NULL);
1706 chrec_b = chrec_convert (type, chrec_b, NULL);
1707 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1708
1709 switch (TREE_CODE (difference))
1710 {
1711 case INTEGER_CST:
1712 if (integer_zerop (difference))
1713 {
1714 /* The difference is equal to zero: the accessed index
1715 overlaps for each iteration in the loop. */
1716 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1717 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1718 *last_conflicts = chrec_dont_know;
1719 dependence_stats.num_ziv_dependent++;
1720 }
1721 else
1722 {
1723 /* The accesses do not overlap. */
1724 *overlaps_a = conflict_fn_no_dependence ();
1725 *overlaps_b = conflict_fn_no_dependence ();
1726 *last_conflicts = integer_zero_node;
1727 dependence_stats.num_ziv_independent++;
1728 }
1729 break;
1730
1731 default:
1732 /* We're not sure whether the indexes overlap. For the moment,
1733 conservatively answer "don't know". */
1734 if (dump_file && (dump_flags & TDF_DETAILS))
1735 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1736
1737 *overlaps_a = conflict_fn_not_known ();
1738 *overlaps_b = conflict_fn_not_known ();
1739 *last_conflicts = chrec_dont_know;
1740 dependence_stats.num_ziv_unimplemented++;
1741 break;
1742 }
1743
1744 if (dump_file && (dump_flags & TDF_DETAILS))
1745 fprintf (dump_file, ")\n");
1746 }
1747
1748 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1749 and only if it fits to the int type. If this is not the case, or the
1750 bound on the number of iterations of LOOP could not be derived, returns
1751 chrec_dont_know. */
1752
1753 static tree
1754 max_stmt_executions_tree (struct loop *loop)
1755 {
1756 widest_int nit;
1757
1758 if (!max_stmt_executions (loop, &nit))
1759 return chrec_dont_know;
1760
1761 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
1762 return chrec_dont_know;
1763
1764 return wide_int_to_tree (unsigned_type_node, nit);
1765 }
1766
1767 /* Determine whether the CHREC is always positive/negative. If the expression
1768 cannot be statically analyzed, return false, otherwise set the answer into
1769 VALUE. */
1770
1771 static bool
1772 chrec_is_positive (tree chrec, bool *value)
1773 {
1774 bool value0, value1, value2;
1775 tree end_value, nb_iter;
1776
1777 switch (TREE_CODE (chrec))
1778 {
1779 case POLYNOMIAL_CHREC:
1780 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1781 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1782 return false;
1783
1784 /* FIXME -- overflows. */
1785 if (value0 == value1)
1786 {
1787 *value = value0;
1788 return true;
1789 }
1790
1791 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1792 and the proof consists in showing that the sign never
1793 changes during the execution of the loop, from 0 to
1794 loop->nb_iterations. */
1795 if (!evolution_function_is_affine_p (chrec))
1796 return false;
1797
1798 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1799 if (chrec_contains_undetermined (nb_iter))
1800 return false;
1801
1802 #if 0
1803 /* TODO -- If the test is after the exit, we may decrease the number of
1804 iterations by one. */
1805 if (after_exit)
1806 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1807 #endif
1808
1809 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1810
1811 if (!chrec_is_positive (end_value, &value2))
1812 return false;
1813
1814 *value = value0;
1815 return value0 == value1;
1816
1817 case INTEGER_CST:
1818 switch (tree_int_cst_sgn (chrec))
1819 {
1820 case -1:
1821 *value = false;
1822 break;
1823 case 1:
1824 *value = true;
1825 break;
1826 default:
1827 return false;
1828 }
1829 return true;
1830
1831 default:
1832 return false;
1833 }
1834 }
1835
1836
1837 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1838 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1839 *OVERLAPS_B are initialized to the functions that describe the
1840 relation between the elements accessed twice by CHREC_A and
1841 CHREC_B. For k >= 0, the following property is verified:
1842
1843 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1844
1845 static void
1846 analyze_siv_subscript_cst_affine (tree chrec_a,
1847 tree chrec_b,
1848 conflict_function **overlaps_a,
1849 conflict_function **overlaps_b,
1850 tree *last_conflicts)
1851 {
1852 bool value0, value1, value2;
1853 tree type, difference, tmp;
1854
1855 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1856 chrec_a = chrec_convert (type, chrec_a, NULL);
1857 chrec_b = chrec_convert (type, chrec_b, NULL);
1858 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1859
1860 /* Special case overlap in the first iteration. */
1861 if (integer_zerop (difference))
1862 {
1863 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1864 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1865 *last_conflicts = integer_one_node;
1866 return;
1867 }
1868
1869 if (!chrec_is_positive (initial_condition (difference), &value0))
1870 {
1871 if (dump_file && (dump_flags & TDF_DETAILS))
1872 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1873
1874 dependence_stats.num_siv_unimplemented++;
1875 *overlaps_a = conflict_fn_not_known ();
1876 *overlaps_b = conflict_fn_not_known ();
1877 *last_conflicts = chrec_dont_know;
1878 return;
1879 }
1880 else
1881 {
1882 if (value0 == false)
1883 {
1884 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1885 {
1886 if (dump_file && (dump_flags & TDF_DETAILS))
1887 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1888
1889 *overlaps_a = conflict_fn_not_known ();
1890 *overlaps_b = conflict_fn_not_known ();
1891 *last_conflicts = chrec_dont_know;
1892 dependence_stats.num_siv_unimplemented++;
1893 return;
1894 }
1895 else
1896 {
1897 if (value1 == true)
1898 {
1899 /* Example:
1900 chrec_a = 12
1901 chrec_b = {10, +, 1}
1902 */
1903
1904 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1905 {
1906 HOST_WIDE_INT numiter;
1907 struct loop *loop = get_chrec_loop (chrec_b);
1908
1909 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1910 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1911 fold_build1 (ABS_EXPR, type, difference),
1912 CHREC_RIGHT (chrec_b));
1913 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1914 *last_conflicts = integer_one_node;
1915
1916
1917 /* Perform weak-zero siv test to see if overlap is
1918 outside the loop bounds. */
1919 numiter = max_stmt_executions_int (loop);
1920
1921 if (numiter >= 0
1922 && compare_tree_int (tmp, numiter) > 0)
1923 {
1924 free_conflict_function (*overlaps_a);
1925 free_conflict_function (*overlaps_b);
1926 *overlaps_a = conflict_fn_no_dependence ();
1927 *overlaps_b = conflict_fn_no_dependence ();
1928 *last_conflicts = integer_zero_node;
1929 dependence_stats.num_siv_independent++;
1930 return;
1931 }
1932 dependence_stats.num_siv_dependent++;
1933 return;
1934 }
1935
1936 /* When the step does not divide the difference, there are
1937 no overlaps. */
1938 else
1939 {
1940 *overlaps_a = conflict_fn_no_dependence ();
1941 *overlaps_b = conflict_fn_no_dependence ();
1942 *last_conflicts = integer_zero_node;
1943 dependence_stats.num_siv_independent++;
1944 return;
1945 }
1946 }
1947
1948 else
1949 {
1950 /* Example:
1951 chrec_a = 12
1952 chrec_b = {10, +, -1}
1953
1954 In this case, chrec_a will not overlap with chrec_b. */
1955 *overlaps_a = conflict_fn_no_dependence ();
1956 *overlaps_b = conflict_fn_no_dependence ();
1957 *last_conflicts = integer_zero_node;
1958 dependence_stats.num_siv_independent++;
1959 return;
1960 }
1961 }
1962 }
1963 else
1964 {
1965 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1966 {
1967 if (dump_file && (dump_flags & TDF_DETAILS))
1968 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1969
1970 *overlaps_a = conflict_fn_not_known ();
1971 *overlaps_b = conflict_fn_not_known ();
1972 *last_conflicts = chrec_dont_know;
1973 dependence_stats.num_siv_unimplemented++;
1974 return;
1975 }
1976 else
1977 {
1978 if (value2 == false)
1979 {
1980 /* Example:
1981 chrec_a = 3
1982 chrec_b = {10, +, -1}
1983 */
1984 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1985 {
1986 HOST_WIDE_INT numiter;
1987 struct loop *loop = get_chrec_loop (chrec_b);
1988
1989 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1990 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1991 CHREC_RIGHT (chrec_b));
1992 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1993 *last_conflicts = integer_one_node;
1994
1995 /* Perform weak-zero siv test to see if overlap is
1996 outside the loop bounds. */
1997 numiter = max_stmt_executions_int (loop);
1998
1999 if (numiter >= 0
2000 && compare_tree_int (tmp, numiter) > 0)
2001 {
2002 free_conflict_function (*overlaps_a);
2003 free_conflict_function (*overlaps_b);
2004 *overlaps_a = conflict_fn_no_dependence ();
2005 *overlaps_b = conflict_fn_no_dependence ();
2006 *last_conflicts = integer_zero_node;
2007 dependence_stats.num_siv_independent++;
2008 return;
2009 }
2010 dependence_stats.num_siv_dependent++;
2011 return;
2012 }
2013
2014 /* When the step does not divide the difference, there
2015 are no overlaps. */
2016 else
2017 {
2018 *overlaps_a = conflict_fn_no_dependence ();
2019 *overlaps_b = conflict_fn_no_dependence ();
2020 *last_conflicts = integer_zero_node;
2021 dependence_stats.num_siv_independent++;
2022 return;
2023 }
2024 }
2025 else
2026 {
2027 /* Example:
2028 chrec_a = 3
2029 chrec_b = {4, +, 1}
2030
2031 In this case, chrec_a will not overlap with chrec_b. */
2032 *overlaps_a = conflict_fn_no_dependence ();
2033 *overlaps_b = conflict_fn_no_dependence ();
2034 *last_conflicts = integer_zero_node;
2035 dependence_stats.num_siv_independent++;
2036 return;
2037 }
2038 }
2039 }
2040 }
2041 }
2042
2043 /* Helper recursive function for initializing the matrix A. Returns
2044 the initial value of CHREC. */
2045
2046 static tree
2047 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2048 {
2049 gcc_assert (chrec);
2050
2051 switch (TREE_CODE (chrec))
2052 {
2053 case POLYNOMIAL_CHREC:
2054 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2055
2056 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2057 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2058
2059 case PLUS_EXPR:
2060 case MULT_EXPR:
2061 case MINUS_EXPR:
2062 {
2063 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2064 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2065
2066 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2067 }
2068
2069 case NOP_EXPR:
2070 {
2071 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2072 return chrec_convert (chrec_type (chrec), op, NULL);
2073 }
2074
2075 case BIT_NOT_EXPR:
2076 {
2077 /* Handle ~X as -1 - X. */
2078 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2079 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2080 build_int_cst (TREE_TYPE (chrec), -1), op);
2081 }
2082
2083 case INTEGER_CST:
2084 return chrec;
2085
2086 default:
2087 gcc_unreachable ();
2088 return NULL_TREE;
2089 }
2090 }
2091
2092 #define FLOOR_DIV(x,y) ((x) / (y))
2093
2094 /* Solves the special case of the Diophantine equation:
2095 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2096
2097 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2098 number of iterations that loops X and Y run. The overlaps will be
2099 constructed as evolutions in dimension DIM. */
2100
2101 static void
2102 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2103 affine_fn *overlaps_a,
2104 affine_fn *overlaps_b,
2105 tree *last_conflicts, int dim)
2106 {
2107 if (((step_a > 0 && step_b > 0)
2108 || (step_a < 0 && step_b < 0)))
2109 {
2110 int step_overlaps_a, step_overlaps_b;
2111 int gcd_steps_a_b, last_conflict, tau2;
2112
2113 gcd_steps_a_b = gcd (step_a, step_b);
2114 step_overlaps_a = step_b / gcd_steps_a_b;
2115 step_overlaps_b = step_a / gcd_steps_a_b;
2116
2117 if (niter > 0)
2118 {
2119 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2120 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2121 last_conflict = tau2;
2122 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2123 }
2124 else
2125 *last_conflicts = chrec_dont_know;
2126
2127 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2128 build_int_cst (NULL_TREE,
2129 step_overlaps_a));
2130 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2131 build_int_cst (NULL_TREE,
2132 step_overlaps_b));
2133 }
2134
2135 else
2136 {
2137 *overlaps_a = affine_fn_cst (integer_zero_node);
2138 *overlaps_b = affine_fn_cst (integer_zero_node);
2139 *last_conflicts = integer_zero_node;
2140 }
2141 }
2142
2143 /* Solves the special case of a Diophantine equation where CHREC_A is
2144 an affine bivariate function, and CHREC_B is an affine univariate
2145 function. For example,
2146
2147 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2148
2149 has the following overlapping functions:
2150
2151 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2152 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2153 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2154
2155 FORNOW: This is a specialized implementation for a case occurring in
2156 a common benchmark. Implement the general algorithm. */
2157
2158 static void
2159 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2160 conflict_function **overlaps_a,
2161 conflict_function **overlaps_b,
2162 tree *last_conflicts)
2163 {
2164 bool xz_p, yz_p, xyz_p;
2165 int step_x, step_y, step_z;
2166 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2167 affine_fn overlaps_a_xz, overlaps_b_xz;
2168 affine_fn overlaps_a_yz, overlaps_b_yz;
2169 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2170 affine_fn ova1, ova2, ovb;
2171 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2172
2173 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2174 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2175 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2176
2177 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2178 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2179 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2180
2181 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2182 {
2183 if (dump_file && (dump_flags & TDF_DETAILS))
2184 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2185
2186 *overlaps_a = conflict_fn_not_known ();
2187 *overlaps_b = conflict_fn_not_known ();
2188 *last_conflicts = chrec_dont_know;
2189 return;
2190 }
2191
2192 niter = MIN (niter_x, niter_z);
2193 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2194 &overlaps_a_xz,
2195 &overlaps_b_xz,
2196 &last_conflicts_xz, 1);
2197 niter = MIN (niter_y, niter_z);
2198 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2199 &overlaps_a_yz,
2200 &overlaps_b_yz,
2201 &last_conflicts_yz, 2);
2202 niter = MIN (niter_x, niter_z);
2203 niter = MIN (niter_y, niter);
2204 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2205 &overlaps_a_xyz,
2206 &overlaps_b_xyz,
2207 &last_conflicts_xyz, 3);
2208
2209 xz_p = !integer_zerop (last_conflicts_xz);
2210 yz_p = !integer_zerop (last_conflicts_yz);
2211 xyz_p = !integer_zerop (last_conflicts_xyz);
2212
2213 if (xz_p || yz_p || xyz_p)
2214 {
2215 ova1 = affine_fn_cst (integer_zero_node);
2216 ova2 = affine_fn_cst (integer_zero_node);
2217 ovb = affine_fn_cst (integer_zero_node);
2218 if (xz_p)
2219 {
2220 affine_fn t0 = ova1;
2221 affine_fn t2 = ovb;
2222
2223 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2224 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2225 affine_fn_free (t0);
2226 affine_fn_free (t2);
2227 *last_conflicts = last_conflicts_xz;
2228 }
2229 if (yz_p)
2230 {
2231 affine_fn t0 = ova2;
2232 affine_fn t2 = ovb;
2233
2234 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2235 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2236 affine_fn_free (t0);
2237 affine_fn_free (t2);
2238 *last_conflicts = last_conflicts_yz;
2239 }
2240 if (xyz_p)
2241 {
2242 affine_fn t0 = ova1;
2243 affine_fn t2 = ova2;
2244 affine_fn t4 = ovb;
2245
2246 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2247 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2248 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2249 affine_fn_free (t0);
2250 affine_fn_free (t2);
2251 affine_fn_free (t4);
2252 *last_conflicts = last_conflicts_xyz;
2253 }
2254 *overlaps_a = conflict_fn (2, ova1, ova2);
2255 *overlaps_b = conflict_fn (1, ovb);
2256 }
2257 else
2258 {
2259 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2260 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2261 *last_conflicts = integer_zero_node;
2262 }
2263
2264 affine_fn_free (overlaps_a_xz);
2265 affine_fn_free (overlaps_b_xz);
2266 affine_fn_free (overlaps_a_yz);
2267 affine_fn_free (overlaps_b_yz);
2268 affine_fn_free (overlaps_a_xyz);
2269 affine_fn_free (overlaps_b_xyz);
2270 }
2271
2272 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2273
2274 static void
2275 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2276 int size)
2277 {
2278 memcpy (vec2, vec1, size * sizeof (*vec1));
2279 }
2280
2281 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2282
2283 static void
2284 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2285 int m, int n)
2286 {
2287 int i;
2288
2289 for (i = 0; i < m; i++)
2290 lambda_vector_copy (mat1[i], mat2[i], n);
2291 }
2292
2293 /* Store the N x N identity matrix in MAT. */
2294
2295 static void
2296 lambda_matrix_id (lambda_matrix mat, int size)
2297 {
2298 int i, j;
2299
2300 for (i = 0; i < size; i++)
2301 for (j = 0; j < size; j++)
2302 mat[i][j] = (i == j) ? 1 : 0;
2303 }
2304
2305 /* Return the first nonzero element of vector VEC1 between START and N.
2306 We must have START <= N. Returns N if VEC1 is the zero vector. */
2307
2308 static int
2309 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2310 {
2311 int j = start;
2312 while (j < n && vec1[j] == 0)
2313 j++;
2314 return j;
2315 }
2316
2317 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2318 R2 = R2 + CONST1 * R1. */
2319
2320 static void
2321 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2322 {
2323 int i;
2324
2325 if (const1 == 0)
2326 return;
2327
2328 for (i = 0; i < n; i++)
2329 mat[r2][i] += const1 * mat[r1][i];
2330 }
2331
2332 /* Swap rows R1 and R2 in matrix MAT. */
2333
2334 static void
2335 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2336 {
2337 lambda_vector row;
2338
2339 row = mat[r1];
2340 mat[r1] = mat[r2];
2341 mat[r2] = row;
2342 }
2343
2344 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2345 and store the result in VEC2. */
2346
2347 static void
2348 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2349 int size, int const1)
2350 {
2351 int i;
2352
2353 if (const1 == 0)
2354 lambda_vector_clear (vec2, size);
2355 else
2356 for (i = 0; i < size; i++)
2357 vec2[i] = const1 * vec1[i];
2358 }
2359
2360 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2361
2362 static void
2363 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2364 int size)
2365 {
2366 lambda_vector_mult_const (vec1, vec2, size, -1);
2367 }
2368
2369 /* Negate row R1 of matrix MAT which has N columns. */
2370
2371 static void
2372 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2373 {
2374 lambda_vector_negate (mat[r1], mat[r1], n);
2375 }
2376
2377 /* Return true if two vectors are equal. */
2378
2379 static bool
2380 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2381 {
2382 int i;
2383 for (i = 0; i < size; i++)
2384 if (vec1[i] != vec2[i])
2385 return false;
2386 return true;
2387 }
2388
2389 /* Given an M x N integer matrix A, this function determines an M x
2390 M unimodular matrix U, and an M x N echelon matrix S such that
2391 "U.A = S". This decomposition is also known as "right Hermite".
2392
2393 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2394 Restructuring Compilers" Utpal Banerjee. */
2395
2396 static void
2397 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2398 lambda_matrix S, lambda_matrix U)
2399 {
2400 int i, j, i0 = 0;
2401
2402 lambda_matrix_copy (A, S, m, n);
2403 lambda_matrix_id (U, m);
2404
2405 for (j = 0; j < n; j++)
2406 {
2407 if (lambda_vector_first_nz (S[j], m, i0) < m)
2408 {
2409 ++i0;
2410 for (i = m - 1; i >= i0; i--)
2411 {
2412 while (S[i][j] != 0)
2413 {
2414 int sigma, factor, a, b;
2415
2416 a = S[i-1][j];
2417 b = S[i][j];
2418 sigma = (a * b < 0) ? -1: 1;
2419 a = abs (a);
2420 b = abs (b);
2421 factor = sigma * (a / b);
2422
2423 lambda_matrix_row_add (S, n, i, i-1, -factor);
2424 lambda_matrix_row_exchange (S, i, i-1);
2425
2426 lambda_matrix_row_add (U, m, i, i-1, -factor);
2427 lambda_matrix_row_exchange (U, i, i-1);
2428 }
2429 }
2430 }
2431 }
2432 }
2433
2434 /* Determines the overlapping elements due to accesses CHREC_A and
2435 CHREC_B, that are affine functions. This function cannot handle
2436 symbolic evolution functions, ie. when initial conditions are
2437 parameters, because it uses lambda matrices of integers. */
2438
2439 static void
2440 analyze_subscript_affine_affine (tree chrec_a,
2441 tree chrec_b,
2442 conflict_function **overlaps_a,
2443 conflict_function **overlaps_b,
2444 tree *last_conflicts)
2445 {
2446 unsigned nb_vars_a, nb_vars_b, dim;
2447 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2448 lambda_matrix A, U, S;
2449 struct obstack scratch_obstack;
2450
2451 if (eq_evolutions_p (chrec_a, chrec_b))
2452 {
2453 /* The accessed index overlaps for each iteration in the
2454 loop. */
2455 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2456 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2457 *last_conflicts = chrec_dont_know;
2458 return;
2459 }
2460 if (dump_file && (dump_flags & TDF_DETAILS))
2461 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2462
2463 /* For determining the initial intersection, we have to solve a
2464 Diophantine equation. This is the most time consuming part.
2465
2466 For answering to the question: "Is there a dependence?" we have
2467 to prove that there exists a solution to the Diophantine
2468 equation, and that the solution is in the iteration domain,
2469 i.e. the solution is positive or zero, and that the solution
2470 happens before the upper bound loop.nb_iterations. Otherwise
2471 there is no dependence. This function outputs a description of
2472 the iterations that hold the intersections. */
2473
2474 nb_vars_a = nb_vars_in_chrec (chrec_a);
2475 nb_vars_b = nb_vars_in_chrec (chrec_b);
2476
2477 gcc_obstack_init (&scratch_obstack);
2478
2479 dim = nb_vars_a + nb_vars_b;
2480 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2481 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2482 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2483
2484 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2485 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2486 gamma = init_b - init_a;
2487
2488 /* Don't do all the hard work of solving the Diophantine equation
2489 when we already know the solution: for example,
2490 | {3, +, 1}_1
2491 | {3, +, 4}_2
2492 | gamma = 3 - 3 = 0.
2493 Then the first overlap occurs during the first iterations:
2494 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2495 */
2496 if (gamma == 0)
2497 {
2498 if (nb_vars_a == 1 && nb_vars_b == 1)
2499 {
2500 HOST_WIDE_INT step_a, step_b;
2501 HOST_WIDE_INT niter, niter_a, niter_b;
2502 affine_fn ova, ovb;
2503
2504 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2505 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2506 niter = MIN (niter_a, niter_b);
2507 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2508 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2509
2510 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2511 &ova, &ovb,
2512 last_conflicts, 1);
2513 *overlaps_a = conflict_fn (1, ova);
2514 *overlaps_b = conflict_fn (1, ovb);
2515 }
2516
2517 else if (nb_vars_a == 2 && nb_vars_b == 1)
2518 compute_overlap_steps_for_affine_1_2
2519 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2520
2521 else if (nb_vars_a == 1 && nb_vars_b == 2)
2522 compute_overlap_steps_for_affine_1_2
2523 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2524
2525 else
2526 {
2527 if (dump_file && (dump_flags & TDF_DETAILS))
2528 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2529 *overlaps_a = conflict_fn_not_known ();
2530 *overlaps_b = conflict_fn_not_known ();
2531 *last_conflicts = chrec_dont_know;
2532 }
2533 goto end_analyze_subs_aa;
2534 }
2535
2536 /* U.A = S */
2537 lambda_matrix_right_hermite (A, dim, 1, S, U);
2538
2539 if (S[0][0] < 0)
2540 {
2541 S[0][0] *= -1;
2542 lambda_matrix_row_negate (U, dim, 0);
2543 }
2544 gcd_alpha_beta = S[0][0];
2545
2546 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2547 but that is a quite strange case. Instead of ICEing, answer
2548 don't know. */
2549 if (gcd_alpha_beta == 0)
2550 {
2551 *overlaps_a = conflict_fn_not_known ();
2552 *overlaps_b = conflict_fn_not_known ();
2553 *last_conflicts = chrec_dont_know;
2554 goto end_analyze_subs_aa;
2555 }
2556
2557 /* The classic "gcd-test". */
2558 if (!int_divides_p (gcd_alpha_beta, gamma))
2559 {
2560 /* The "gcd-test" has determined that there is no integer
2561 solution, i.e. there is no dependence. */
2562 *overlaps_a = conflict_fn_no_dependence ();
2563 *overlaps_b = conflict_fn_no_dependence ();
2564 *last_conflicts = integer_zero_node;
2565 }
2566
2567 /* Both access functions are univariate. This includes SIV and MIV cases. */
2568 else if (nb_vars_a == 1 && nb_vars_b == 1)
2569 {
2570 /* Both functions should have the same evolution sign. */
2571 if (((A[0][0] > 0 && -A[1][0] > 0)
2572 || (A[0][0] < 0 && -A[1][0] < 0)))
2573 {
2574 /* The solutions are given by:
2575 |
2576 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2577 | [u21 u22] [y0]
2578
2579 For a given integer t. Using the following variables,
2580
2581 | i0 = u11 * gamma / gcd_alpha_beta
2582 | j0 = u12 * gamma / gcd_alpha_beta
2583 | i1 = u21
2584 | j1 = u22
2585
2586 the solutions are:
2587
2588 | x0 = i0 + i1 * t,
2589 | y0 = j0 + j1 * t. */
2590 HOST_WIDE_INT i0, j0, i1, j1;
2591
2592 i0 = U[0][0] * gamma / gcd_alpha_beta;
2593 j0 = U[0][1] * gamma / gcd_alpha_beta;
2594 i1 = U[1][0];
2595 j1 = U[1][1];
2596
2597 if ((i1 == 0 && i0 < 0)
2598 || (j1 == 0 && j0 < 0))
2599 {
2600 /* There is no solution.
2601 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2602 falls in here, but for the moment we don't look at the
2603 upper bound of the iteration domain. */
2604 *overlaps_a = conflict_fn_no_dependence ();
2605 *overlaps_b = conflict_fn_no_dependence ();
2606 *last_conflicts = integer_zero_node;
2607 goto end_analyze_subs_aa;
2608 }
2609
2610 if (i1 > 0 && j1 > 0)
2611 {
2612 HOST_WIDE_INT niter_a
2613 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2614 HOST_WIDE_INT niter_b
2615 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2616 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2617
2618 /* (X0, Y0) is a solution of the Diophantine equation:
2619 "chrec_a (X0) = chrec_b (Y0)". */
2620 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2621 CEIL (-j0, j1));
2622 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2623 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2624
2625 /* (X1, Y1) is the smallest positive solution of the eq
2626 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2627 first conflict occurs. */
2628 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2629 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2630 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2631
2632 if (niter > 0)
2633 {
2634 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2635 FLOOR_DIV (niter - j0, j1));
2636 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2637
2638 /* If the overlap occurs outside of the bounds of the
2639 loop, there is no dependence. */
2640 if (x1 >= niter || y1 >= niter)
2641 {
2642 *overlaps_a = conflict_fn_no_dependence ();
2643 *overlaps_b = conflict_fn_no_dependence ();
2644 *last_conflicts = integer_zero_node;
2645 goto end_analyze_subs_aa;
2646 }
2647 else
2648 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2649 }
2650 else
2651 *last_conflicts = chrec_dont_know;
2652
2653 *overlaps_a
2654 = conflict_fn (1,
2655 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2656 1,
2657 build_int_cst (NULL_TREE, i1)));
2658 *overlaps_b
2659 = conflict_fn (1,
2660 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2661 1,
2662 build_int_cst (NULL_TREE, j1)));
2663 }
2664 else
2665 {
2666 /* FIXME: For the moment, the upper bound of the
2667 iteration domain for i and j is not checked. */
2668 if (dump_file && (dump_flags & TDF_DETAILS))
2669 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2670 *overlaps_a = conflict_fn_not_known ();
2671 *overlaps_b = conflict_fn_not_known ();
2672 *last_conflicts = chrec_dont_know;
2673 }
2674 }
2675 else
2676 {
2677 if (dump_file && (dump_flags & TDF_DETAILS))
2678 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2679 *overlaps_a = conflict_fn_not_known ();
2680 *overlaps_b = conflict_fn_not_known ();
2681 *last_conflicts = chrec_dont_know;
2682 }
2683 }
2684 else
2685 {
2686 if (dump_file && (dump_flags & TDF_DETAILS))
2687 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2688 *overlaps_a = conflict_fn_not_known ();
2689 *overlaps_b = conflict_fn_not_known ();
2690 *last_conflicts = chrec_dont_know;
2691 }
2692
2693 end_analyze_subs_aa:
2694 obstack_free (&scratch_obstack, NULL);
2695 if (dump_file && (dump_flags & TDF_DETAILS))
2696 {
2697 fprintf (dump_file, " (overlaps_a = ");
2698 dump_conflict_function (dump_file, *overlaps_a);
2699 fprintf (dump_file, ")\n (overlaps_b = ");
2700 dump_conflict_function (dump_file, *overlaps_b);
2701 fprintf (dump_file, "))\n");
2702 }
2703 }
2704
2705 /* Returns true when analyze_subscript_affine_affine can be used for
2706 determining the dependence relation between chrec_a and chrec_b,
2707 that contain symbols. This function modifies chrec_a and chrec_b
2708 such that the analysis result is the same, and such that they don't
2709 contain symbols, and then can safely be passed to the analyzer.
2710
2711 Example: The analysis of the following tuples of evolutions produce
2712 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2713 vs. {0, +, 1}_1
2714
2715 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2716 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2717 */
2718
2719 static bool
2720 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2721 {
2722 tree diff, type, left_a, left_b, right_b;
2723
2724 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2725 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2726 /* FIXME: For the moment not handled. Might be refined later. */
2727 return false;
2728
2729 type = chrec_type (*chrec_a);
2730 left_a = CHREC_LEFT (*chrec_a);
2731 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2732 diff = chrec_fold_minus (type, left_a, left_b);
2733
2734 if (!evolution_function_is_constant_p (diff))
2735 return false;
2736
2737 if (dump_file && (dump_flags & TDF_DETAILS))
2738 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2739
2740 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2741 diff, CHREC_RIGHT (*chrec_a));
2742 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2743 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2744 build_int_cst (type, 0),
2745 right_b);
2746 return true;
2747 }
2748
2749 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2750 *OVERLAPS_B are initialized to the functions that describe the
2751 relation between the elements accessed twice by CHREC_A and
2752 CHREC_B. For k >= 0, the following property is verified:
2753
2754 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2755
2756 static void
2757 analyze_siv_subscript (tree chrec_a,
2758 tree chrec_b,
2759 conflict_function **overlaps_a,
2760 conflict_function **overlaps_b,
2761 tree *last_conflicts,
2762 int loop_nest_num)
2763 {
2764 dependence_stats.num_siv++;
2765
2766 if (dump_file && (dump_flags & TDF_DETAILS))
2767 fprintf (dump_file, "(analyze_siv_subscript \n");
2768
2769 if (evolution_function_is_constant_p (chrec_a)
2770 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2771 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2772 overlaps_a, overlaps_b, last_conflicts);
2773
2774 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2775 && evolution_function_is_constant_p (chrec_b))
2776 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2777 overlaps_b, overlaps_a, last_conflicts);
2778
2779 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2780 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2781 {
2782 if (!chrec_contains_symbols (chrec_a)
2783 && !chrec_contains_symbols (chrec_b))
2784 {
2785 analyze_subscript_affine_affine (chrec_a, chrec_b,
2786 overlaps_a, overlaps_b,
2787 last_conflicts);
2788
2789 if (CF_NOT_KNOWN_P (*overlaps_a)
2790 || CF_NOT_KNOWN_P (*overlaps_b))
2791 dependence_stats.num_siv_unimplemented++;
2792 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2793 || CF_NO_DEPENDENCE_P (*overlaps_b))
2794 dependence_stats.num_siv_independent++;
2795 else
2796 dependence_stats.num_siv_dependent++;
2797 }
2798 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2799 &chrec_b))
2800 {
2801 analyze_subscript_affine_affine (chrec_a, chrec_b,
2802 overlaps_a, overlaps_b,
2803 last_conflicts);
2804
2805 if (CF_NOT_KNOWN_P (*overlaps_a)
2806 || CF_NOT_KNOWN_P (*overlaps_b))
2807 dependence_stats.num_siv_unimplemented++;
2808 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2809 || CF_NO_DEPENDENCE_P (*overlaps_b))
2810 dependence_stats.num_siv_independent++;
2811 else
2812 dependence_stats.num_siv_dependent++;
2813 }
2814 else
2815 goto siv_subscript_dontknow;
2816 }
2817
2818 else
2819 {
2820 siv_subscript_dontknow:;
2821 if (dump_file && (dump_flags & TDF_DETAILS))
2822 fprintf (dump_file, " siv test failed: unimplemented");
2823 *overlaps_a = conflict_fn_not_known ();
2824 *overlaps_b = conflict_fn_not_known ();
2825 *last_conflicts = chrec_dont_know;
2826 dependence_stats.num_siv_unimplemented++;
2827 }
2828
2829 if (dump_file && (dump_flags & TDF_DETAILS))
2830 fprintf (dump_file, ")\n");
2831 }
2832
2833 /* Returns false if we can prove that the greatest common divisor of the steps
2834 of CHREC does not divide CST, false otherwise. */
2835
2836 static bool
2837 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2838 {
2839 HOST_WIDE_INT cd = 0, val;
2840 tree step;
2841
2842 if (!tree_fits_shwi_p (cst))
2843 return true;
2844 val = tree_to_shwi (cst);
2845
2846 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2847 {
2848 step = CHREC_RIGHT (chrec);
2849 if (!tree_fits_shwi_p (step))
2850 return true;
2851 cd = gcd (cd, tree_to_shwi (step));
2852 chrec = CHREC_LEFT (chrec);
2853 }
2854
2855 return val % cd == 0;
2856 }
2857
2858 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2859 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2860 functions that describe the relation between the elements accessed
2861 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2862 is verified:
2863
2864 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2865
2866 static void
2867 analyze_miv_subscript (tree chrec_a,
2868 tree chrec_b,
2869 conflict_function **overlaps_a,
2870 conflict_function **overlaps_b,
2871 tree *last_conflicts,
2872 struct loop *loop_nest)
2873 {
2874 tree type, difference;
2875
2876 dependence_stats.num_miv++;
2877 if (dump_file && (dump_flags & TDF_DETAILS))
2878 fprintf (dump_file, "(analyze_miv_subscript \n");
2879
2880 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2881 chrec_a = chrec_convert (type, chrec_a, NULL);
2882 chrec_b = chrec_convert (type, chrec_b, NULL);
2883 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2884
2885 if (eq_evolutions_p (chrec_a, chrec_b))
2886 {
2887 /* Access functions are the same: all the elements are accessed
2888 in the same order. */
2889 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2890 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2891 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2892 dependence_stats.num_miv_dependent++;
2893 }
2894
2895 else if (evolution_function_is_constant_p (difference)
2896 /* For the moment, the following is verified:
2897 evolution_function_is_affine_multivariate_p (chrec_a,
2898 loop_nest->num) */
2899 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2900 {
2901 /* testsuite/.../ssa-chrec-33.c
2902 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2903
2904 The difference is 1, and all the evolution steps are multiples
2905 of 2, consequently there are no overlapping elements. */
2906 *overlaps_a = conflict_fn_no_dependence ();
2907 *overlaps_b = conflict_fn_no_dependence ();
2908 *last_conflicts = integer_zero_node;
2909 dependence_stats.num_miv_independent++;
2910 }
2911
2912 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2913 && !chrec_contains_symbols (chrec_a)
2914 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2915 && !chrec_contains_symbols (chrec_b))
2916 {
2917 /* testsuite/.../ssa-chrec-35.c
2918 {0, +, 1}_2 vs. {0, +, 1}_3
2919 the overlapping elements are respectively located at iterations:
2920 {0, +, 1}_x and {0, +, 1}_x,
2921 in other words, we have the equality:
2922 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2923
2924 Other examples:
2925 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2926 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2927
2928 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2929 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2930 */
2931 analyze_subscript_affine_affine (chrec_a, chrec_b,
2932 overlaps_a, overlaps_b, last_conflicts);
2933
2934 if (CF_NOT_KNOWN_P (*overlaps_a)
2935 || CF_NOT_KNOWN_P (*overlaps_b))
2936 dependence_stats.num_miv_unimplemented++;
2937 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2938 || CF_NO_DEPENDENCE_P (*overlaps_b))
2939 dependence_stats.num_miv_independent++;
2940 else
2941 dependence_stats.num_miv_dependent++;
2942 }
2943
2944 else
2945 {
2946 /* When the analysis is too difficult, answer "don't know". */
2947 if (dump_file && (dump_flags & TDF_DETAILS))
2948 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2949
2950 *overlaps_a = conflict_fn_not_known ();
2951 *overlaps_b = conflict_fn_not_known ();
2952 *last_conflicts = chrec_dont_know;
2953 dependence_stats.num_miv_unimplemented++;
2954 }
2955
2956 if (dump_file && (dump_flags & TDF_DETAILS))
2957 fprintf (dump_file, ")\n");
2958 }
2959
2960 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2961 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2962 OVERLAP_ITERATIONS_B are initialized with two functions that
2963 describe the iterations that contain conflicting elements.
2964
2965 Remark: For an integer k >= 0, the following equality is true:
2966
2967 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2968 */
2969
2970 static void
2971 analyze_overlapping_iterations (tree chrec_a,
2972 tree chrec_b,
2973 conflict_function **overlap_iterations_a,
2974 conflict_function **overlap_iterations_b,
2975 tree *last_conflicts, struct loop *loop_nest)
2976 {
2977 unsigned int lnn = loop_nest->num;
2978
2979 dependence_stats.num_subscript_tests++;
2980
2981 if (dump_file && (dump_flags & TDF_DETAILS))
2982 {
2983 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2984 fprintf (dump_file, " (chrec_a = ");
2985 print_generic_expr (dump_file, chrec_a, 0);
2986 fprintf (dump_file, ")\n (chrec_b = ");
2987 print_generic_expr (dump_file, chrec_b, 0);
2988 fprintf (dump_file, ")\n");
2989 }
2990
2991 if (chrec_a == NULL_TREE
2992 || chrec_b == NULL_TREE
2993 || chrec_contains_undetermined (chrec_a)
2994 || chrec_contains_undetermined (chrec_b))
2995 {
2996 dependence_stats.num_subscript_undetermined++;
2997
2998 *overlap_iterations_a = conflict_fn_not_known ();
2999 *overlap_iterations_b = conflict_fn_not_known ();
3000 }
3001
3002 /* If they are the same chrec, and are affine, they overlap
3003 on every iteration. */
3004 else if (eq_evolutions_p (chrec_a, chrec_b)
3005 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3006 || operand_equal_p (chrec_a, chrec_b, 0)))
3007 {
3008 dependence_stats.num_same_subscript_function++;
3009 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3010 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3011 *last_conflicts = chrec_dont_know;
3012 }
3013
3014 /* If they aren't the same, and aren't affine, we can't do anything
3015 yet. */
3016 else if ((chrec_contains_symbols (chrec_a)
3017 || chrec_contains_symbols (chrec_b))
3018 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3019 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3020 {
3021 dependence_stats.num_subscript_undetermined++;
3022 *overlap_iterations_a = conflict_fn_not_known ();
3023 *overlap_iterations_b = conflict_fn_not_known ();
3024 }
3025
3026 else if (ziv_subscript_p (chrec_a, chrec_b))
3027 analyze_ziv_subscript (chrec_a, chrec_b,
3028 overlap_iterations_a, overlap_iterations_b,
3029 last_conflicts);
3030
3031 else if (siv_subscript_p (chrec_a, chrec_b))
3032 analyze_siv_subscript (chrec_a, chrec_b,
3033 overlap_iterations_a, overlap_iterations_b,
3034 last_conflicts, lnn);
3035
3036 else
3037 analyze_miv_subscript (chrec_a, chrec_b,
3038 overlap_iterations_a, overlap_iterations_b,
3039 last_conflicts, loop_nest);
3040
3041 if (dump_file && (dump_flags & TDF_DETAILS))
3042 {
3043 fprintf (dump_file, " (overlap_iterations_a = ");
3044 dump_conflict_function (dump_file, *overlap_iterations_a);
3045 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3046 dump_conflict_function (dump_file, *overlap_iterations_b);
3047 fprintf (dump_file, "))\n");
3048 }
3049 }
3050
3051 /* Helper function for uniquely inserting distance vectors. */
3052
3053 static void
3054 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3055 {
3056 unsigned i;
3057 lambda_vector v;
3058
3059 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3060 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3061 return;
3062
3063 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3064 }
3065
3066 /* Helper function for uniquely inserting direction vectors. */
3067
3068 static void
3069 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3070 {
3071 unsigned i;
3072 lambda_vector v;
3073
3074 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3075 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3076 return;
3077
3078 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3079 }
3080
3081 /* Add a distance of 1 on all the loops outer than INDEX. If we
3082 haven't yet determined a distance for this outer loop, push a new
3083 distance vector composed of the previous distance, and a distance
3084 of 1 for this outer loop. Example:
3085
3086 | loop_1
3087 | loop_2
3088 | A[10]
3089 | endloop_2
3090 | endloop_1
3091
3092 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3093 save (0, 1), then we have to save (1, 0). */
3094
3095 static void
3096 add_outer_distances (struct data_dependence_relation *ddr,
3097 lambda_vector dist_v, int index)
3098 {
3099 /* For each outer loop where init_v is not set, the accesses are
3100 in dependence of distance 1 in the loop. */
3101 while (--index >= 0)
3102 {
3103 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3104 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3105 save_v[index] = 1;
3106 save_dist_v (ddr, save_v);
3107 }
3108 }
3109
3110 /* Return false when fail to represent the data dependence as a
3111 distance vector. INIT_B is set to true when a component has been
3112 added to the distance vector DIST_V. INDEX_CARRY is then set to
3113 the index in DIST_V that carries the dependence. */
3114
3115 static bool
3116 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3117 struct data_reference *ddr_a,
3118 struct data_reference *ddr_b,
3119 lambda_vector dist_v, bool *init_b,
3120 int *index_carry)
3121 {
3122 unsigned i;
3123 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3124
3125 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3126 {
3127 tree access_fn_a, access_fn_b;
3128 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3129
3130 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3131 {
3132 non_affine_dependence_relation (ddr);
3133 return false;
3134 }
3135
3136 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3137 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3138
3139 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3140 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3141 {
3142 int dist, index;
3143 int var_a = CHREC_VARIABLE (access_fn_a);
3144 int var_b = CHREC_VARIABLE (access_fn_b);
3145
3146 if (var_a != var_b
3147 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3148 {
3149 non_affine_dependence_relation (ddr);
3150 return false;
3151 }
3152
3153 dist = int_cst_value (SUB_DISTANCE (subscript));
3154 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3155 *index_carry = MIN (index, *index_carry);
3156
3157 /* This is the subscript coupling test. If we have already
3158 recorded a distance for this loop (a distance coming from
3159 another subscript), it should be the same. For example,
3160 in the following code, there is no dependence:
3161
3162 | loop i = 0, N, 1
3163 | T[i+1][i] = ...
3164 | ... = T[i][i]
3165 | endloop
3166 */
3167 if (init_v[index] != 0 && dist_v[index] != dist)
3168 {
3169 finalize_ddr_dependent (ddr, chrec_known);
3170 return false;
3171 }
3172
3173 dist_v[index] = dist;
3174 init_v[index] = 1;
3175 *init_b = true;
3176 }
3177 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3178 {
3179 /* This can be for example an affine vs. constant dependence
3180 (T[i] vs. T[3]) that is not an affine dependence and is
3181 not representable as a distance vector. */
3182 non_affine_dependence_relation (ddr);
3183 return false;
3184 }
3185 }
3186
3187 return true;
3188 }
3189
3190 /* Return true when the DDR contains only constant access functions. */
3191
3192 static bool
3193 constant_access_functions (const struct data_dependence_relation *ddr)
3194 {
3195 unsigned i;
3196
3197 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3198 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3199 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3200 return false;
3201
3202 return true;
3203 }
3204
3205 /* Helper function for the case where DDR_A and DDR_B are the same
3206 multivariate access function with a constant step. For an example
3207 see pr34635-1.c. */
3208
3209 static void
3210 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3211 {
3212 int x_1, x_2;
3213 tree c_1 = CHREC_LEFT (c_2);
3214 tree c_0 = CHREC_LEFT (c_1);
3215 lambda_vector dist_v;
3216 int v1, v2, cd;
3217
3218 /* Polynomials with more than 2 variables are not handled yet. When
3219 the evolution steps are parameters, it is not possible to
3220 represent the dependence using classical distance vectors. */
3221 if (TREE_CODE (c_0) != INTEGER_CST
3222 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3223 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3224 {
3225 DDR_AFFINE_P (ddr) = false;
3226 return;
3227 }
3228
3229 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3230 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3231
3232 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3233 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3234 v1 = int_cst_value (CHREC_RIGHT (c_1));
3235 v2 = int_cst_value (CHREC_RIGHT (c_2));
3236 cd = gcd (v1, v2);
3237 v1 /= cd;
3238 v2 /= cd;
3239
3240 if (v2 < 0)
3241 {
3242 v2 = -v2;
3243 v1 = -v1;
3244 }
3245
3246 dist_v[x_1] = v2;
3247 dist_v[x_2] = -v1;
3248 save_dist_v (ddr, dist_v);
3249
3250 add_outer_distances (ddr, dist_v, x_1);
3251 }
3252
3253 /* Helper function for the case where DDR_A and DDR_B are the same
3254 access functions. */
3255
3256 static void
3257 add_other_self_distances (struct data_dependence_relation *ddr)
3258 {
3259 lambda_vector dist_v;
3260 unsigned i;
3261 int index_carry = DDR_NB_LOOPS (ddr);
3262
3263 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3264 {
3265 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3266
3267 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3268 {
3269 if (!evolution_function_is_univariate_p (access_fun))
3270 {
3271 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3272 {
3273 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3274 return;
3275 }
3276
3277 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3278
3279 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3280 add_multivariate_self_dist (ddr, access_fun);
3281 else
3282 /* The evolution step is not constant: it varies in
3283 the outer loop, so this cannot be represented by a
3284 distance vector. For example in pr34635.c the
3285 evolution is {0, +, {0, +, 4}_1}_2. */
3286 DDR_AFFINE_P (ddr) = false;
3287
3288 return;
3289 }
3290
3291 index_carry = MIN (index_carry,
3292 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3293 DDR_LOOP_NEST (ddr)));
3294 }
3295 }
3296
3297 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3298 add_outer_distances (ddr, dist_v, index_carry);
3299 }
3300
3301 static void
3302 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3303 {
3304 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3305
3306 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3307 save_dist_v (ddr, dist_v);
3308 }
3309
3310 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3311 is the case for example when access functions are the same and
3312 equal to a constant, as in:
3313
3314 | loop_1
3315 | A[3] = ...
3316 | ... = A[3]
3317 | endloop_1
3318
3319 in which case the distance vectors are (0) and (1). */
3320
3321 static void
3322 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3323 {
3324 unsigned i, j;
3325
3326 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3327 {
3328 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3329 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3330 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3331
3332 for (j = 0; j < ca->n; j++)
3333 if (affine_function_zero_p (ca->fns[j]))
3334 {
3335 insert_innermost_unit_dist_vector (ddr);
3336 return;
3337 }
3338
3339 for (j = 0; j < cb->n; j++)
3340 if (affine_function_zero_p (cb->fns[j]))
3341 {
3342 insert_innermost_unit_dist_vector (ddr);
3343 return;
3344 }
3345 }
3346 }
3347
3348 /* Compute the classic per loop distance vector. DDR is the data
3349 dependence relation to build a vector from. Return false when fail
3350 to represent the data dependence as a distance vector. */
3351
3352 static bool
3353 build_classic_dist_vector (struct data_dependence_relation *ddr,
3354 struct loop *loop_nest)
3355 {
3356 bool init_b = false;
3357 int index_carry = DDR_NB_LOOPS (ddr);
3358 lambda_vector dist_v;
3359
3360 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3361 return false;
3362
3363 if (same_access_functions (ddr))
3364 {
3365 /* Save the 0 vector. */
3366 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3367 save_dist_v (ddr, dist_v);
3368
3369 if (constant_access_functions (ddr))
3370 add_distance_for_zero_overlaps (ddr);
3371
3372 if (DDR_NB_LOOPS (ddr) > 1)
3373 add_other_self_distances (ddr);
3374
3375 return true;
3376 }
3377
3378 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3379 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3380 dist_v, &init_b, &index_carry))
3381 return false;
3382
3383 /* Save the distance vector if we initialized one. */
3384 if (init_b)
3385 {
3386 /* Verify a basic constraint: classic distance vectors should
3387 always be lexicographically positive.
3388
3389 Data references are collected in the order of execution of
3390 the program, thus for the following loop
3391
3392 | for (i = 1; i < 100; i++)
3393 | for (j = 1; j < 100; j++)
3394 | {
3395 | t = T[j+1][i-1]; // A
3396 | T[j][i] = t + 2; // B
3397 | }
3398
3399 references are collected following the direction of the wind:
3400 A then B. The data dependence tests are performed also
3401 following this order, such that we're looking at the distance
3402 separating the elements accessed by A from the elements later
3403 accessed by B. But in this example, the distance returned by
3404 test_dep (A, B) is lexicographically negative (-1, 1), that
3405 means that the access A occurs later than B with respect to
3406 the outer loop, ie. we're actually looking upwind. In this
3407 case we solve test_dep (B, A) looking downwind to the
3408 lexicographically positive solution, that returns the
3409 distance vector (1, -1). */
3410 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3411 {
3412 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3413 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3414 loop_nest))
3415 return false;
3416 compute_subscript_distance (ddr);
3417 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3418 save_v, &init_b, &index_carry))
3419 return false;
3420 save_dist_v (ddr, save_v);
3421 DDR_REVERSED_P (ddr) = true;
3422
3423 /* In this case there is a dependence forward for all the
3424 outer loops:
3425
3426 | for (k = 1; k < 100; k++)
3427 | for (i = 1; i < 100; i++)
3428 | for (j = 1; j < 100; j++)
3429 | {
3430 | t = T[j+1][i-1]; // A
3431 | T[j][i] = t + 2; // B
3432 | }
3433
3434 the vectors are:
3435 (0, 1, -1)
3436 (1, 1, -1)
3437 (1, -1, 1)
3438 */
3439 if (DDR_NB_LOOPS (ddr) > 1)
3440 {
3441 add_outer_distances (ddr, save_v, index_carry);
3442 add_outer_distances (ddr, dist_v, index_carry);
3443 }
3444 }
3445 else
3446 {
3447 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3448 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3449
3450 if (DDR_NB_LOOPS (ddr) > 1)
3451 {
3452 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3453
3454 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3455 DDR_A (ddr), loop_nest))
3456 return false;
3457 compute_subscript_distance (ddr);
3458 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3459 opposite_v, &init_b,
3460 &index_carry))
3461 return false;
3462
3463 save_dist_v (ddr, save_v);
3464 add_outer_distances (ddr, dist_v, index_carry);
3465 add_outer_distances (ddr, opposite_v, index_carry);
3466 }
3467 else
3468 save_dist_v (ddr, save_v);
3469 }
3470 }
3471 else
3472 {
3473 /* There is a distance of 1 on all the outer loops: Example:
3474 there is a dependence of distance 1 on loop_1 for the array A.
3475
3476 | loop_1
3477 | A[5] = ...
3478 | endloop
3479 */
3480 add_outer_distances (ddr, dist_v,
3481 lambda_vector_first_nz (dist_v,
3482 DDR_NB_LOOPS (ddr), 0));
3483 }
3484
3485 if (dump_file && (dump_flags & TDF_DETAILS))
3486 {
3487 unsigned i;
3488
3489 fprintf (dump_file, "(build_classic_dist_vector\n");
3490 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3491 {
3492 fprintf (dump_file, " dist_vector = (");
3493 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3494 DDR_NB_LOOPS (ddr));
3495 fprintf (dump_file, " )\n");
3496 }
3497 fprintf (dump_file, ")\n");
3498 }
3499
3500 return true;
3501 }
3502
3503 /* Return the direction for a given distance.
3504 FIXME: Computing dir this way is suboptimal, since dir can catch
3505 cases that dist is unable to represent. */
3506
3507 static inline enum data_dependence_direction
3508 dir_from_dist (int dist)
3509 {
3510 if (dist > 0)
3511 return dir_positive;
3512 else if (dist < 0)
3513 return dir_negative;
3514 else
3515 return dir_equal;
3516 }
3517
3518 /* Compute the classic per loop direction vector. DDR is the data
3519 dependence relation to build a vector from. */
3520
3521 static void
3522 build_classic_dir_vector (struct data_dependence_relation *ddr)
3523 {
3524 unsigned i, j;
3525 lambda_vector dist_v;
3526
3527 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3528 {
3529 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3530
3531 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3532 dir_v[j] = dir_from_dist (dist_v[j]);
3533
3534 save_dir_v (ddr, dir_v);
3535 }
3536 }
3537
3538 /* Helper function. Returns true when there is a dependence between
3539 data references DRA and DRB. */
3540
3541 static bool
3542 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3543 struct data_reference *dra,
3544 struct data_reference *drb,
3545 struct loop *loop_nest)
3546 {
3547 unsigned int i;
3548 tree last_conflicts;
3549 struct subscript *subscript;
3550 tree res = NULL_TREE;
3551
3552 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3553 {
3554 conflict_function *overlaps_a, *overlaps_b;
3555
3556 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3557 DR_ACCESS_FN (drb, i),
3558 &overlaps_a, &overlaps_b,
3559 &last_conflicts, loop_nest);
3560
3561 if (SUB_CONFLICTS_IN_A (subscript))
3562 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3563 if (SUB_CONFLICTS_IN_B (subscript))
3564 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3565
3566 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3567 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3568 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3569
3570 /* If there is any undetermined conflict function we have to
3571 give a conservative answer in case we cannot prove that
3572 no dependence exists when analyzing another subscript. */
3573 if (CF_NOT_KNOWN_P (overlaps_a)
3574 || CF_NOT_KNOWN_P (overlaps_b))
3575 {
3576 res = chrec_dont_know;
3577 continue;
3578 }
3579
3580 /* When there is a subscript with no dependence we can stop. */
3581 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3582 || CF_NO_DEPENDENCE_P (overlaps_b))
3583 {
3584 res = chrec_known;
3585 break;
3586 }
3587 }
3588
3589 if (res == NULL_TREE)
3590 return true;
3591
3592 if (res == chrec_known)
3593 dependence_stats.num_dependence_independent++;
3594 else
3595 dependence_stats.num_dependence_undetermined++;
3596 finalize_ddr_dependent (ddr, res);
3597 return false;
3598 }
3599
3600 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3601
3602 static void
3603 subscript_dependence_tester (struct data_dependence_relation *ddr,
3604 struct loop *loop_nest)
3605 {
3606 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3607 dependence_stats.num_dependence_dependent++;
3608
3609 compute_subscript_distance (ddr);
3610 if (build_classic_dist_vector (ddr, loop_nest))
3611 build_classic_dir_vector (ddr);
3612 }
3613
3614 /* Returns true when all the access functions of A are affine or
3615 constant with respect to LOOP_NEST. */
3616
3617 static bool
3618 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3619 const struct loop *loop_nest)
3620 {
3621 unsigned int i;
3622 vec<tree> fns = DR_ACCESS_FNS (a);
3623 tree t;
3624
3625 FOR_EACH_VEC_ELT (fns, i, t)
3626 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3627 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3628 return false;
3629
3630 return true;
3631 }
3632
3633 /* Initializes an equation for an OMEGA problem using the information
3634 contained in the ACCESS_FUN. Returns true when the operation
3635 succeeded.
3636
3637 PB is the omega constraint system.
3638 EQ is the number of the equation to be initialized.
3639 OFFSET is used for shifting the variables names in the constraints:
3640 a constrain is composed of 2 * the number of variables surrounding
3641 dependence accesses. OFFSET is set either to 0 for the first n variables,
3642 then it is set to n.
3643 ACCESS_FUN is expected to be an affine chrec. */
3644
3645 static bool
3646 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3647 unsigned int offset, tree access_fun,
3648 struct data_dependence_relation *ddr)
3649 {
3650 switch (TREE_CODE (access_fun))
3651 {
3652 case POLYNOMIAL_CHREC:
3653 {
3654 tree left = CHREC_LEFT (access_fun);
3655 tree right = CHREC_RIGHT (access_fun);
3656 int var = CHREC_VARIABLE (access_fun);
3657 unsigned var_idx;
3658
3659 if (TREE_CODE (right) != INTEGER_CST)
3660 return false;
3661
3662 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3663 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3664
3665 /* Compute the innermost loop index. */
3666 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3667
3668 if (offset == 0)
3669 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3670 += int_cst_value (right);
3671
3672 switch (TREE_CODE (left))
3673 {
3674 case POLYNOMIAL_CHREC:
3675 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3676
3677 case INTEGER_CST:
3678 pb->eqs[eq].coef[0] += int_cst_value (left);
3679 return true;
3680
3681 default:
3682 return false;
3683 }
3684 }
3685
3686 case INTEGER_CST:
3687 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3688 return true;
3689
3690 default:
3691 return false;
3692 }
3693 }
3694
3695 /* As explained in the comments preceding init_omega_for_ddr, we have
3696 to set up a system for each loop level, setting outer loops
3697 variation to zero, and current loop variation to positive or zero.
3698 Save each lexico positive distance vector. */
3699
3700 static void
3701 omega_extract_distance_vectors (omega_pb pb,
3702 struct data_dependence_relation *ddr)
3703 {
3704 int eq, geq;
3705 unsigned i, j;
3706 struct loop *loopi, *loopj;
3707 enum omega_result res;
3708
3709 /* Set a new problem for each loop in the nest. The basis is the
3710 problem that we have initialized until now. On top of this we
3711 add new constraints. */
3712 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3713 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3714 {
3715 int dist = 0;
3716 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3717 DDR_NB_LOOPS (ddr));
3718
3719 omega_copy_problem (copy, pb);
3720
3721 /* For all the outer loops "loop_j", add "dj = 0". */
3722 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3723 {
3724 eq = omega_add_zero_eq (copy, omega_black);
3725 copy->eqs[eq].coef[j + 1] = 1;
3726 }
3727
3728 /* For "loop_i", add "0 <= di". */
3729 geq = omega_add_zero_geq (copy, omega_black);
3730 copy->geqs[geq].coef[i + 1] = 1;
3731
3732 /* Reduce the constraint system, and test that the current
3733 problem is feasible. */
3734 res = omega_simplify_problem (copy);
3735 if (res == omega_false
3736 || res == omega_unknown
3737 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3738 goto next_problem;
3739
3740 for (eq = 0; eq < copy->num_subs; eq++)
3741 if (copy->subs[eq].key == (int) i + 1)
3742 {
3743 dist = copy->subs[eq].coef[0];
3744 goto found_dist;
3745 }
3746
3747 if (dist == 0)
3748 {
3749 /* Reinitialize problem... */
3750 omega_copy_problem (copy, pb);
3751 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3752 {
3753 eq = omega_add_zero_eq (copy, omega_black);
3754 copy->eqs[eq].coef[j + 1] = 1;
3755 }
3756
3757 /* ..., but this time "di = 1". */
3758 eq = omega_add_zero_eq (copy, omega_black);
3759 copy->eqs[eq].coef[i + 1] = 1;
3760 copy->eqs[eq].coef[0] = -1;
3761
3762 res = omega_simplify_problem (copy);
3763 if (res == omega_false
3764 || res == omega_unknown
3765 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3766 goto next_problem;
3767
3768 for (eq = 0; eq < copy->num_subs; eq++)
3769 if (copy->subs[eq].key == (int) i + 1)
3770 {
3771 dist = copy->subs[eq].coef[0];
3772 goto found_dist;
3773 }
3774 }
3775
3776 found_dist:;
3777 /* Save the lexicographically positive distance vector. */
3778 if (dist >= 0)
3779 {
3780 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3781 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3782
3783 dist_v[i] = dist;
3784
3785 for (eq = 0; eq < copy->num_subs; eq++)
3786 if (copy->subs[eq].key > 0)
3787 {
3788 dist = copy->subs[eq].coef[0];
3789 dist_v[copy->subs[eq].key - 1] = dist;
3790 }
3791
3792 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3793 dir_v[j] = dir_from_dist (dist_v[j]);
3794
3795 save_dist_v (ddr, dist_v);
3796 save_dir_v (ddr, dir_v);
3797 }
3798
3799 next_problem:;
3800 omega_free_problem (copy);
3801 }
3802 }
3803
3804 /* This is called for each subscript of a tuple of data references:
3805 insert an equality for representing the conflicts. */
3806
3807 static bool
3808 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3809 struct data_dependence_relation *ddr,
3810 omega_pb pb, bool *maybe_dependent)
3811 {
3812 int eq;
3813 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3814 TREE_TYPE (access_fun_b));
3815 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3816 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3817 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3818 tree minus_one;
3819
3820 /* When the fun_a - fun_b is not constant, the dependence is not
3821 captured by the classic distance vector representation. */
3822 if (TREE_CODE (difference) != INTEGER_CST)
3823 return false;
3824
3825 /* ZIV test. */
3826 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3827 {
3828 /* There is no dependence. */
3829 *maybe_dependent = false;
3830 return true;
3831 }
3832
3833 minus_one = build_int_cst (type, -1);
3834 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3835
3836 eq = omega_add_zero_eq (pb, omega_black);
3837 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3838 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3839 /* There is probably a dependence, but the system of
3840 constraints cannot be built: answer "don't know". */
3841 return false;
3842
3843 /* GCD test. */
3844 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3845 && !int_divides_p (lambda_vector_gcd
3846 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3847 2 * DDR_NB_LOOPS (ddr)),
3848 pb->eqs[eq].coef[0]))
3849 {
3850 /* There is no dependence. */
3851 *maybe_dependent = false;
3852 return true;
3853 }
3854
3855 return true;
3856 }
3857
3858 /* Helper function, same as init_omega_for_ddr but specialized for
3859 data references A and B. */
3860
3861 static bool
3862 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3863 struct data_dependence_relation *ddr,
3864 omega_pb pb, bool *maybe_dependent)
3865 {
3866 unsigned i;
3867 int ineq;
3868 struct loop *loopi;
3869 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3870
3871 /* Insert an equality per subscript. */
3872 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3873 {
3874 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3875 ddr, pb, maybe_dependent))
3876 return false;
3877 else if (*maybe_dependent == false)
3878 {
3879 /* There is no dependence. */
3880 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3881 return true;
3882 }
3883 }
3884
3885 /* Insert inequalities: constraints corresponding to the iteration
3886 domain, i.e. the loops surrounding the references "loop_x" and
3887 the distance variables "dx". The layout of the OMEGA
3888 representation is as follows:
3889 - coef[0] is the constant
3890 - coef[1..nb_loops] are the protected variables that will not be
3891 removed by the solver: the "dx"
3892 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3893 */
3894 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3895 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3896 {
3897 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3898
3899 /* 0 <= loop_x */
3900 ineq = omega_add_zero_geq (pb, omega_black);
3901 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3902
3903 /* 0 <= loop_x + dx */
3904 ineq = omega_add_zero_geq (pb, omega_black);
3905 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3906 pb->geqs[ineq].coef[i + 1] = 1;
3907
3908 if (nbi != -1)
3909 {
3910 /* loop_x <= nb_iters */
3911 ineq = omega_add_zero_geq (pb, omega_black);
3912 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3913 pb->geqs[ineq].coef[0] = nbi;
3914
3915 /* loop_x + dx <= nb_iters */
3916 ineq = omega_add_zero_geq (pb, omega_black);
3917 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3918 pb->geqs[ineq].coef[i + 1] = -1;
3919 pb->geqs[ineq].coef[0] = nbi;
3920
3921 /* A step "dx" bigger than nb_iters is not feasible, so
3922 add "0 <= nb_iters + dx", */
3923 ineq = omega_add_zero_geq (pb, omega_black);
3924 pb->geqs[ineq].coef[i + 1] = 1;
3925 pb->geqs[ineq].coef[0] = nbi;
3926 /* and "dx <= nb_iters". */
3927 ineq = omega_add_zero_geq (pb, omega_black);
3928 pb->geqs[ineq].coef[i + 1] = -1;
3929 pb->geqs[ineq].coef[0] = nbi;
3930 }
3931 }
3932
3933 omega_extract_distance_vectors (pb, ddr);
3934
3935 return true;
3936 }
3937
3938 /* Sets up the Omega dependence problem for the data dependence
3939 relation DDR. Returns false when the constraint system cannot be
3940 built, ie. when the test answers "don't know". Returns true
3941 otherwise, and when independence has been proved (using one of the
3942 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3943 set MAYBE_DEPENDENT to true.
3944
3945 Example: for setting up the dependence system corresponding to the
3946 conflicting accesses
3947
3948 | loop_i
3949 | loop_j
3950 | A[i, i+1] = ...
3951 | ... A[2*j, 2*(i + j)]
3952 | endloop_j
3953 | endloop_i
3954
3955 the following constraints come from the iteration domain:
3956
3957 0 <= i <= Ni
3958 0 <= i + di <= Ni
3959 0 <= j <= Nj
3960 0 <= j + dj <= Nj
3961
3962 where di, dj are the distance variables. The constraints
3963 representing the conflicting elements are:
3964
3965 i = 2 * (j + dj)
3966 i + 1 = 2 * (i + di + j + dj)
3967
3968 For asking that the resulting distance vector (di, dj) be
3969 lexicographically positive, we insert the constraint "di >= 0". If
3970 "di = 0" in the solution, we fix that component to zero, and we
3971 look at the inner loops: we set a new problem where all the outer
3972 loop distances are zero, and fix this inner component to be
3973 positive. When one of the components is positive, we save that
3974 distance, and set a new problem where the distance on this loop is
3975 zero, searching for other distances in the inner loops. Here is
3976 the classic example that illustrates that we have to set for each
3977 inner loop a new problem:
3978
3979 | loop_1
3980 | loop_2
3981 | A[10]
3982 | endloop_2
3983 | endloop_1
3984
3985 we have to save two distances (1, 0) and (0, 1).
3986
3987 Given two array references, refA and refB, we have to set the
3988 dependence problem twice, refA vs. refB and refB vs. refA, and we
3989 cannot do a single test, as refB might occur before refA in the
3990 inner loops, and the contrary when considering outer loops: ex.
3991
3992 | loop_0
3993 | loop_1
3994 | loop_2
3995 | T[{1,+,1}_2][{1,+,1}_1] // refA
3996 | T[{2,+,1}_2][{0,+,1}_1] // refB
3997 | endloop_2
3998 | endloop_1
3999 | endloop_0
4000
4001 refB touches the elements in T before refA, and thus for the same
4002 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4003 but for successive loop_0 iterations, we have (1, -1, 1)
4004
4005 The Omega solver expects the distance variables ("di" in the
4006 previous example) to come first in the constraint system (as
4007 variables to be protected, or "safe" variables), the constraint
4008 system is built using the following layout:
4009
4010 "cst | distance vars | index vars".
4011 */
4012
4013 static bool
4014 init_omega_for_ddr (struct data_dependence_relation *ddr,
4015 bool *maybe_dependent)
4016 {
4017 omega_pb pb;
4018 bool res = false;
4019
4020 *maybe_dependent = true;
4021
4022 if (same_access_functions (ddr))
4023 {
4024 unsigned j;
4025 lambda_vector dir_v;
4026
4027 /* Save the 0 vector. */
4028 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4029 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4030 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4031 dir_v[j] = dir_equal;
4032 save_dir_v (ddr, dir_v);
4033
4034 /* Save the dependences carried by outer loops. */
4035 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4036 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4037 maybe_dependent);
4038 omega_free_problem (pb);
4039 return res;
4040 }
4041
4042 /* Omega expects the protected variables (those that have to be kept
4043 after elimination) to appear first in the constraint system.
4044 These variables are the distance variables. In the following
4045 initialization we declare NB_LOOPS safe variables, and the total
4046 number of variables for the constraint system is 2*NB_LOOPS. */
4047 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4048 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4049 maybe_dependent);
4050 omega_free_problem (pb);
4051
4052 /* Stop computation if not decidable, or no dependence. */
4053 if (res == false || *maybe_dependent == false)
4054 return res;
4055
4056 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4057 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4058 maybe_dependent);
4059 omega_free_problem (pb);
4060
4061 return res;
4062 }
4063
4064 /* Return true when DDR contains the same information as that stored
4065 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4066
4067 static bool
4068 ddr_consistent_p (FILE *file,
4069 struct data_dependence_relation *ddr,
4070 vec<lambda_vector> dist_vects,
4071 vec<lambda_vector> dir_vects)
4072 {
4073 unsigned int i, j;
4074
4075 /* If dump_file is set, output there. */
4076 if (dump_file && (dump_flags & TDF_DETAILS))
4077 file = dump_file;
4078
4079 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4080 {
4081 lambda_vector b_dist_v;
4082 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4083 dist_vects.length (),
4084 DDR_NUM_DIST_VECTS (ddr));
4085
4086 fprintf (file, "Banerjee dist vectors:\n");
4087 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4088 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4089
4090 fprintf (file, "Omega dist vectors:\n");
4091 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4092 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4093
4094 fprintf (file, "data dependence relation:\n");
4095 dump_data_dependence_relation (file, ddr);
4096
4097 fprintf (file, ")\n");
4098 return false;
4099 }
4100
4101 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4102 {
4103 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4104 dir_vects.length (),
4105 DDR_NUM_DIR_VECTS (ddr));
4106 return false;
4107 }
4108
4109 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4110 {
4111 lambda_vector a_dist_v;
4112 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4113
4114 /* Distance vectors are not ordered in the same way in the DDR
4115 and in the DIST_VECTS: search for a matching vector. */
4116 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4117 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4118 break;
4119
4120 if (j == dist_vects.length ())
4121 {
4122 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4123 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4124 fprintf (file, "not found in Omega dist vectors:\n");
4125 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4126 fprintf (file, "data dependence relation:\n");
4127 dump_data_dependence_relation (file, ddr);
4128 fprintf (file, ")\n");
4129 }
4130 }
4131
4132 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4133 {
4134 lambda_vector a_dir_v;
4135 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4136
4137 /* Direction vectors are not ordered in the same way in the DDR
4138 and in the DIR_VECTS: search for a matching vector. */
4139 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4140 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4141 break;
4142
4143 if (j == dist_vects.length ())
4144 {
4145 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4146 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4147 fprintf (file, "not found in Omega dir vectors:\n");
4148 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4149 fprintf (file, "data dependence relation:\n");
4150 dump_data_dependence_relation (file, ddr);
4151 fprintf (file, ")\n");
4152 }
4153 }
4154
4155 return true;
4156 }
4157
4158 /* This computes the affine dependence relation between A and B with
4159 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4160 independence between two accesses, while CHREC_DONT_KNOW is used
4161 for representing the unknown relation.
4162
4163 Note that it is possible to stop the computation of the dependence
4164 relation the first time we detect a CHREC_KNOWN element for a given
4165 subscript. */
4166
4167 void
4168 compute_affine_dependence (struct data_dependence_relation *ddr,
4169 struct loop *loop_nest)
4170 {
4171 struct data_reference *dra = DDR_A (ddr);
4172 struct data_reference *drb = DDR_B (ddr);
4173
4174 if (dump_file && (dump_flags & TDF_DETAILS))
4175 {
4176 fprintf (dump_file, "(compute_affine_dependence\n");
4177 fprintf (dump_file, " stmt_a: ");
4178 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4179 fprintf (dump_file, " stmt_b: ");
4180 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4181 }
4182
4183 /* Analyze only when the dependence relation is not yet known. */
4184 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4185 {
4186 dependence_stats.num_dependence_tests++;
4187
4188 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4189 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4190 {
4191 subscript_dependence_tester (ddr, loop_nest);
4192
4193 if (flag_check_data_deps)
4194 {
4195 /* Dump the dependences from the first algorithm. */
4196 if (dump_file && (dump_flags & TDF_DETAILS))
4197 {
4198 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4199 dump_data_dependence_relation (dump_file, ddr);
4200 }
4201
4202 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4203 {
4204 bool maybe_dependent;
4205 vec<lambda_vector> dir_vects, dist_vects;
4206
4207 /* Save the result of the first DD analyzer. */
4208 dist_vects = DDR_DIST_VECTS (ddr);
4209 dir_vects = DDR_DIR_VECTS (ddr);
4210
4211 /* Reset the information. */
4212 DDR_DIST_VECTS (ddr).create (0);
4213 DDR_DIR_VECTS (ddr).create (0);
4214
4215 /* Compute the same information using Omega. */
4216 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4217 goto csys_dont_know;
4218
4219 if (dump_file && (dump_flags & TDF_DETAILS))
4220 {
4221 fprintf (dump_file, "Omega Analyzer\n");
4222 dump_data_dependence_relation (dump_file, ddr);
4223 }
4224
4225 /* Check that we get the same information. */
4226 if (maybe_dependent)
4227 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4228 dir_vects));
4229 }
4230 }
4231 }
4232
4233 /* As a last case, if the dependence cannot be determined, or if
4234 the dependence is considered too difficult to determine, answer
4235 "don't know". */
4236 else
4237 {
4238 csys_dont_know:;
4239 dependence_stats.num_dependence_undetermined++;
4240
4241 if (dump_file && (dump_flags & TDF_DETAILS))
4242 {
4243 fprintf (dump_file, "Data ref a:\n");
4244 dump_data_reference (dump_file, dra);
4245 fprintf (dump_file, "Data ref b:\n");
4246 dump_data_reference (dump_file, drb);
4247 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4248 }
4249 finalize_ddr_dependent (ddr, chrec_dont_know);
4250 }
4251 }
4252
4253 if (dump_file && (dump_flags & TDF_DETAILS))
4254 {
4255 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4256 fprintf (dump_file, ") -> no dependence\n");
4257 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4258 fprintf (dump_file, ") -> dependence analysis failed\n");
4259 else
4260 fprintf (dump_file, ")\n");
4261 }
4262 }
4263
4264 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4265 the data references in DATAREFS, in the LOOP_NEST. When
4266 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4267 relations. Return true when successful, i.e. data references number
4268 is small enough to be handled. */
4269
4270 bool
4271 compute_all_dependences (vec<data_reference_p> datarefs,
4272 vec<ddr_p> *dependence_relations,
4273 vec<loop_p> loop_nest,
4274 bool compute_self_and_rr)
4275 {
4276 struct data_dependence_relation *ddr;
4277 struct data_reference *a, *b;
4278 unsigned int i, j;
4279
4280 if ((int) datarefs.length ()
4281 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4282 {
4283 struct data_dependence_relation *ddr;
4284
4285 /* Insert a single relation into dependence_relations:
4286 chrec_dont_know. */
4287 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4288 dependence_relations->safe_push (ddr);
4289 return false;
4290 }
4291
4292 FOR_EACH_VEC_ELT (datarefs, i, a)
4293 for (j = i + 1; datarefs.iterate (j, &b); j++)
4294 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4295 {
4296 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4297 dependence_relations->safe_push (ddr);
4298 if (loop_nest.exists ())
4299 compute_affine_dependence (ddr, loop_nest[0]);
4300 }
4301
4302 if (compute_self_and_rr)
4303 FOR_EACH_VEC_ELT (datarefs, i, a)
4304 {
4305 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4306 dependence_relations->safe_push (ddr);
4307 if (loop_nest.exists ())
4308 compute_affine_dependence (ddr, loop_nest[0]);
4309 }
4310
4311 return true;
4312 }
4313
4314 /* Describes a location of a memory reference. */
4315
4316 typedef struct data_ref_loc_d
4317 {
4318 /* Position of the memory reference. */
4319 tree *pos;
4320
4321 /* True if the memory reference is read. */
4322 bool is_read;
4323 } data_ref_loc;
4324
4325
4326 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4327 true if STMT clobbers memory, false otherwise. */
4328
4329 static bool
4330 get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references)
4331 {
4332 bool clobbers_memory = false;
4333 data_ref_loc ref;
4334 tree *op0, *op1;
4335 enum gimple_code stmt_code = gimple_code (stmt);
4336
4337 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4338 As we cannot model data-references to not spelled out
4339 accesses give up if they may occur. */
4340 if (stmt_code == GIMPLE_CALL
4341 && !(gimple_call_flags (stmt) & ECF_CONST))
4342 {
4343 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4344 if (gimple_call_internal_p (stmt)
4345 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
4346 {
4347 struct loop *loop = gimple_bb (stmt)->loop_father;
4348 tree uid = gimple_call_arg (stmt, 0);
4349 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4350 if (loop == NULL
4351 || loop->simduid != SSA_NAME_VAR (uid))
4352 clobbers_memory = true;
4353 }
4354 else
4355 clobbers_memory = true;
4356 }
4357 else if (stmt_code == GIMPLE_ASM
4358 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt)))
4359 clobbers_memory = true;
4360
4361 if (!gimple_vuse (stmt))
4362 return clobbers_memory;
4363
4364 if (stmt_code == GIMPLE_ASSIGN)
4365 {
4366 tree base;
4367 op0 = gimple_assign_lhs_ptr (stmt);
4368 op1 = gimple_assign_rhs1_ptr (stmt);
4369
4370 if (DECL_P (*op1)
4371 || (REFERENCE_CLASS_P (*op1)
4372 && (base = get_base_address (*op1))
4373 && TREE_CODE (base) != SSA_NAME))
4374 {
4375 ref.pos = op1;
4376 ref.is_read = true;
4377 references->safe_push (ref);
4378 }
4379 }
4380 else if (stmt_code == GIMPLE_CALL)
4381 {
4382 unsigned i, n;
4383
4384 op0 = gimple_call_lhs_ptr (stmt);
4385 n = gimple_call_num_args (stmt);
4386 for (i = 0; i < n; i++)
4387 {
4388 op1 = gimple_call_arg_ptr (stmt, i);
4389
4390 if (DECL_P (*op1)
4391 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4392 {
4393 ref.pos = op1;
4394 ref.is_read = true;
4395 references->safe_push (ref);
4396 }
4397 }
4398 }
4399 else
4400 return clobbers_memory;
4401
4402 if (*op0
4403 && (DECL_P (*op0)
4404 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4405 {
4406 ref.pos = op0;
4407 ref.is_read = false;
4408 references->safe_push (ref);
4409 }
4410 return clobbers_memory;
4411 }
4412
4413 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4414 reference, returns false, otherwise returns true. NEST is the outermost
4415 loop of the loop nest in which the references should be analyzed. */
4416
4417 bool
4418 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4419 vec<data_reference_p> *datarefs)
4420 {
4421 unsigned i;
4422 stack_vec<data_ref_loc, 2> references;
4423 data_ref_loc *ref;
4424 bool ret = true;
4425 data_reference_p dr;
4426
4427 if (get_references_in_stmt (stmt, &references))
4428 return false;
4429
4430 FOR_EACH_VEC_ELT (references, i, ref)
4431 {
4432 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4433 *ref->pos, stmt, ref->is_read);
4434 gcc_assert (dr != NULL);
4435 datarefs->safe_push (dr);
4436 }
4437 references.release ();
4438 return ret;
4439 }
4440
4441 /* Stores the data references in STMT to DATAREFS. If there is an
4442 unanalyzable reference, returns false, otherwise returns true.
4443 NEST is the outermost loop of the loop nest in which the references
4444 should be instantiated, LOOP is the loop in which the references
4445 should be analyzed. */
4446
4447 bool
4448 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4449 vec<data_reference_p> *datarefs)
4450 {
4451 unsigned i;
4452 stack_vec<data_ref_loc, 2> references;
4453 data_ref_loc *ref;
4454 bool ret = true;
4455 data_reference_p dr;
4456
4457 if (get_references_in_stmt (stmt, &references))
4458 return false;
4459
4460 FOR_EACH_VEC_ELT (references, i, ref)
4461 {
4462 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4463 gcc_assert (dr != NULL);
4464 datarefs->safe_push (dr);
4465 }
4466
4467 references.release ();
4468 return ret;
4469 }
4470
4471 /* Search the data references in LOOP, and record the information into
4472 DATAREFS. Returns chrec_dont_know when failing to analyze a
4473 difficult case, returns NULL_TREE otherwise. */
4474
4475 tree
4476 find_data_references_in_bb (struct loop *loop, basic_block bb,
4477 vec<data_reference_p> *datarefs)
4478 {
4479 gimple_stmt_iterator bsi;
4480
4481 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4482 {
4483 gimple stmt = gsi_stmt (bsi);
4484
4485 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4486 {
4487 struct data_reference *res;
4488 res = XCNEW (struct data_reference);
4489 datarefs->safe_push (res);
4490
4491 return chrec_dont_know;
4492 }
4493 }
4494
4495 return NULL_TREE;
4496 }
4497
4498 /* Search the data references in LOOP, and record the information into
4499 DATAREFS. Returns chrec_dont_know when failing to analyze a
4500 difficult case, returns NULL_TREE otherwise.
4501
4502 TODO: This function should be made smarter so that it can handle address
4503 arithmetic as if they were array accesses, etc. */
4504
4505 tree
4506 find_data_references_in_loop (struct loop *loop,
4507 vec<data_reference_p> *datarefs)
4508 {
4509 basic_block bb, *bbs;
4510 unsigned int i;
4511
4512 bbs = get_loop_body_in_dom_order (loop);
4513
4514 for (i = 0; i < loop->num_nodes; i++)
4515 {
4516 bb = bbs[i];
4517
4518 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4519 {
4520 free (bbs);
4521 return chrec_dont_know;
4522 }
4523 }
4524 free (bbs);
4525
4526 return NULL_TREE;
4527 }
4528
4529 /* Recursive helper function. */
4530
4531 static bool
4532 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4533 {
4534 /* Inner loops of the nest should not contain siblings. Example:
4535 when there are two consecutive loops,
4536
4537 | loop_0
4538 | loop_1
4539 | A[{0, +, 1}_1]
4540 | endloop_1
4541 | loop_2
4542 | A[{0, +, 1}_2]
4543 | endloop_2
4544 | endloop_0
4545
4546 the dependence relation cannot be captured by the distance
4547 abstraction. */
4548 if (loop->next)
4549 return false;
4550
4551 loop_nest->safe_push (loop);
4552 if (loop->inner)
4553 return find_loop_nest_1 (loop->inner, loop_nest);
4554 return true;
4555 }
4556
4557 /* Return false when the LOOP is not well nested. Otherwise return
4558 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4559 contain the loops from the outermost to the innermost, as they will
4560 appear in the classic distance vector. */
4561
4562 bool
4563 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4564 {
4565 loop_nest->safe_push (loop);
4566 if (loop->inner)
4567 return find_loop_nest_1 (loop->inner, loop_nest);
4568 return true;
4569 }
4570
4571 /* Returns true when the data dependences have been computed, false otherwise.
4572 Given a loop nest LOOP, the following vectors are returned:
4573 DATAREFS is initialized to all the array elements contained in this loop,
4574 DEPENDENCE_RELATIONS contains the relations between the data references.
4575 Compute read-read and self relations if
4576 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4577
4578 bool
4579 compute_data_dependences_for_loop (struct loop *loop,
4580 bool compute_self_and_read_read_dependences,
4581 vec<loop_p> *loop_nest,
4582 vec<data_reference_p> *datarefs,
4583 vec<ddr_p> *dependence_relations)
4584 {
4585 bool res = true;
4586
4587 memset (&dependence_stats, 0, sizeof (dependence_stats));
4588
4589 /* If the loop nest is not well formed, or one of the data references
4590 is not computable, give up without spending time to compute other
4591 dependences. */
4592 if (!loop
4593 || !find_loop_nest (loop, loop_nest)
4594 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4595 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4596 compute_self_and_read_read_dependences))
4597 res = false;
4598
4599 if (dump_file && (dump_flags & TDF_STATS))
4600 {
4601 fprintf (dump_file, "Dependence tester statistics:\n");
4602
4603 fprintf (dump_file, "Number of dependence tests: %d\n",
4604 dependence_stats.num_dependence_tests);
4605 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4606 dependence_stats.num_dependence_dependent);
4607 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4608 dependence_stats.num_dependence_independent);
4609 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4610 dependence_stats.num_dependence_undetermined);
4611
4612 fprintf (dump_file, "Number of subscript tests: %d\n",
4613 dependence_stats.num_subscript_tests);
4614 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4615 dependence_stats.num_subscript_undetermined);
4616 fprintf (dump_file, "Number of same subscript function: %d\n",
4617 dependence_stats.num_same_subscript_function);
4618
4619 fprintf (dump_file, "Number of ziv tests: %d\n",
4620 dependence_stats.num_ziv);
4621 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4622 dependence_stats.num_ziv_dependent);
4623 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4624 dependence_stats.num_ziv_independent);
4625 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4626 dependence_stats.num_ziv_unimplemented);
4627
4628 fprintf (dump_file, "Number of siv tests: %d\n",
4629 dependence_stats.num_siv);
4630 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4631 dependence_stats.num_siv_dependent);
4632 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4633 dependence_stats.num_siv_independent);
4634 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4635 dependence_stats.num_siv_unimplemented);
4636
4637 fprintf (dump_file, "Number of miv tests: %d\n",
4638 dependence_stats.num_miv);
4639 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4640 dependence_stats.num_miv_dependent);
4641 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4642 dependence_stats.num_miv_independent);
4643 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4644 dependence_stats.num_miv_unimplemented);
4645 }
4646
4647 return res;
4648 }
4649
4650 /* Returns true when the data dependences for the basic block BB have been
4651 computed, false otherwise.
4652 DATAREFS is initialized to all the array elements contained in this basic
4653 block, DEPENDENCE_RELATIONS contains the relations between the data
4654 references. Compute read-read and self relations if
4655 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4656 bool
4657 compute_data_dependences_for_bb (basic_block bb,
4658 bool compute_self_and_read_read_dependences,
4659 vec<data_reference_p> *datarefs,
4660 vec<ddr_p> *dependence_relations)
4661 {
4662 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4663 return false;
4664
4665 return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4666 compute_self_and_read_read_dependences);
4667 }
4668
4669 /* Entry point (for testing only). Analyze all the data references
4670 and the dependence relations in LOOP.
4671
4672 The data references are computed first.
4673
4674 A relation on these nodes is represented by a complete graph. Some
4675 of the relations could be of no interest, thus the relations can be
4676 computed on demand.
4677
4678 In the following function we compute all the relations. This is
4679 just a first implementation that is here for:
4680 - for showing how to ask for the dependence relations,
4681 - for the debugging the whole dependence graph,
4682 - for the dejagnu testcases and maintenance.
4683
4684 It is possible to ask only for a part of the graph, avoiding to
4685 compute the whole dependence graph. The computed dependences are
4686 stored in a knowledge base (KB) such that later queries don't
4687 recompute the same information. The implementation of this KB is
4688 transparent to the optimizer, and thus the KB can be changed with a
4689 more efficient implementation, or the KB could be disabled. */
4690 static void
4691 analyze_all_data_dependences (struct loop *loop)
4692 {
4693 unsigned int i;
4694 int nb_data_refs = 10;
4695 vec<data_reference_p> datarefs;
4696 datarefs.create (nb_data_refs);
4697 vec<ddr_p> dependence_relations;
4698 dependence_relations.create (nb_data_refs * nb_data_refs);
4699 vec<loop_p> loop_nest;
4700 loop_nest.create (3);
4701
4702 /* Compute DDs on the whole function. */
4703 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4704 &dependence_relations);
4705
4706 if (dump_file)
4707 {
4708 dump_data_dependence_relations (dump_file, dependence_relations);
4709 fprintf (dump_file, "\n\n");
4710
4711 if (dump_flags & TDF_DETAILS)
4712 dump_dist_dir_vectors (dump_file, dependence_relations);
4713
4714 if (dump_flags & TDF_STATS)
4715 {
4716 unsigned nb_top_relations = 0;
4717 unsigned nb_bot_relations = 0;
4718 unsigned nb_chrec_relations = 0;
4719 struct data_dependence_relation *ddr;
4720
4721 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4722 {
4723 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4724 nb_top_relations++;
4725
4726 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4727 nb_bot_relations++;
4728
4729 else
4730 nb_chrec_relations++;
4731 }
4732
4733 gather_stats_on_scev_database ();
4734 }
4735 }
4736
4737 loop_nest.release ();
4738 free_dependence_relations (dependence_relations);
4739 free_data_refs (datarefs);
4740 }
4741
4742 /* Computes all the data dependences and check that the results of
4743 several analyzers are the same. */
4744
4745 void
4746 tree_check_data_deps (void)
4747 {
4748 struct loop *loop_nest;
4749
4750 FOR_EACH_LOOP (loop_nest, 0)
4751 analyze_all_data_dependences (loop_nest);
4752 }
4753
4754 /* Free the memory used by a data dependence relation DDR. */
4755
4756 void
4757 free_dependence_relation (struct data_dependence_relation *ddr)
4758 {
4759 if (ddr == NULL)
4760 return;
4761
4762 if (DDR_SUBSCRIPTS (ddr).exists ())
4763 free_subscripts (DDR_SUBSCRIPTS (ddr));
4764 DDR_DIST_VECTS (ddr).release ();
4765 DDR_DIR_VECTS (ddr).release ();
4766
4767 free (ddr);
4768 }
4769
4770 /* Free the memory used by the data dependence relations from
4771 DEPENDENCE_RELATIONS. */
4772
4773 void
4774 free_dependence_relations (vec<ddr_p> dependence_relations)
4775 {
4776 unsigned int i;
4777 struct data_dependence_relation *ddr;
4778
4779 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4780 if (ddr)
4781 free_dependence_relation (ddr);
4782
4783 dependence_relations.release ();
4784 }
4785
4786 /* Free the memory used by the data references from DATAREFS. */
4787
4788 void
4789 free_data_refs (vec<data_reference_p> datarefs)
4790 {
4791 unsigned int i;
4792 struct data_reference *dr;
4793
4794 FOR_EACH_VEC_ELT (datarefs, i, dr)
4795 free_data_ref (dr);
4796 datarefs.release ();
4797 }