]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-data-ref.c
* diagnostic.c: Don't include tm.h, tree.h, tm_p.h, langhooks.h or
[thirdparty/gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "tm.h"
81 #include "ggc.h"
82 #include "flags.h"
83 #include "tree.h"
84
85 /* These RTL headers are needed for basic-block.h. */
86 #include "basic-block.h"
87 #include "diagnostic.h"
88 #include "tree-pretty-print.h"
89 #include "gimple-pretty-print.h"
90 #include "tree-flow.h"
91 #include "tree-dump.h"
92 #include "timevar.h"
93 #include "cfgloop.h"
94 #include "tree-data-ref.h"
95 #include "tree-scalar-evolution.h"
96 #include "tree-pass.h"
97 #include "langhooks.h"
98
99 static struct datadep_stats
100 {
101 int num_dependence_tests;
102 int num_dependence_dependent;
103 int num_dependence_independent;
104 int num_dependence_undetermined;
105
106 int num_subscript_tests;
107 int num_subscript_undetermined;
108 int num_same_subscript_function;
109
110 int num_ziv;
111 int num_ziv_independent;
112 int num_ziv_dependent;
113 int num_ziv_unimplemented;
114
115 int num_siv;
116 int num_siv_independent;
117 int num_siv_dependent;
118 int num_siv_unimplemented;
119
120 int num_miv;
121 int num_miv_independent;
122 int num_miv_dependent;
123 int num_miv_unimplemented;
124 } dependence_stats;
125
126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
127 struct data_reference *,
128 struct data_reference *,
129 struct loop *);
130 /* Returns true iff A divides B. */
131
132 static inline bool
133 tree_fold_divides_p (const_tree a, const_tree b)
134 {
135 gcc_assert (TREE_CODE (a) == INTEGER_CST);
136 gcc_assert (TREE_CODE (b) == INTEGER_CST);
137 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 }
139
140 /* Returns true iff A divides B. */
141
142 static inline bool
143 int_divides_p (int a, int b)
144 {
145 return ((b % a) == 0);
146 }
147
148 \f
149
150 /* Dump into FILE all the data references from DATAREFS. */
151
152 void
153 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
154 {
155 unsigned int i;
156 struct data_reference *dr;
157
158 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
159 dump_data_reference (file, dr);
160 }
161
162 /* Dump into STDERR all the data references from DATAREFS. */
163
164 void
165 debug_data_references (VEC (data_reference_p, heap) *datarefs)
166 {
167 dump_data_references (stderr, datarefs);
168 }
169
170 /* Dump to STDERR all the dependence relations from DDRS. */
171
172 void
173 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
174 {
175 dump_data_dependence_relations (stderr, ddrs);
176 }
177
178 /* Dump into FILE all the dependence relations from DDRS. */
179
180 void
181 dump_data_dependence_relations (FILE *file,
182 VEC (ddr_p, heap) *ddrs)
183 {
184 unsigned int i;
185 struct data_dependence_relation *ddr;
186
187 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
188 dump_data_dependence_relation (file, ddr);
189 }
190
191 /* Print to STDERR the data_reference DR. */
192
193 void
194 debug_data_reference (struct data_reference *dr)
195 {
196 dump_data_reference (stderr, dr);
197 }
198
199 /* Dump function for a DATA_REFERENCE structure. */
200
201 void
202 dump_data_reference (FILE *outf,
203 struct data_reference *dr)
204 {
205 unsigned int i;
206
207 fprintf (outf, "#(Data Ref: \n# stmt: ");
208 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
209 fprintf (outf, "# ref: ");
210 print_generic_stmt (outf, DR_REF (dr), 0);
211 fprintf (outf, "# base_object: ");
212 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
213
214 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
215 {
216 fprintf (outf, "# Access function %d: ", i);
217 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
218 }
219 fprintf (outf, "#)\n");
220 }
221
222 /* Dumps the affine function described by FN to the file OUTF. */
223
224 static void
225 dump_affine_function (FILE *outf, affine_fn fn)
226 {
227 unsigned i;
228 tree coef;
229
230 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
231 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
232 {
233 fprintf (outf, " + ");
234 print_generic_expr (outf, coef, TDF_SLIM);
235 fprintf (outf, " * x_%u", i);
236 }
237 }
238
239 /* Dumps the conflict function CF to the file OUTF. */
240
241 static void
242 dump_conflict_function (FILE *outf, conflict_function *cf)
243 {
244 unsigned i;
245
246 if (cf->n == NO_DEPENDENCE)
247 fprintf (outf, "no dependence\n");
248 else if (cf->n == NOT_KNOWN)
249 fprintf (outf, "not known\n");
250 else
251 {
252 for (i = 0; i < cf->n; i++)
253 {
254 fprintf (outf, "[");
255 dump_affine_function (outf, cf->fns[i]);
256 fprintf (outf, "]\n");
257 }
258 }
259 }
260
261 /* Dump function for a SUBSCRIPT structure. */
262
263 void
264 dump_subscript (FILE *outf, struct subscript *subscript)
265 {
266 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
267
268 fprintf (outf, "\n (subscript \n");
269 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
270 dump_conflict_function (outf, cf);
271 if (CF_NONTRIVIAL_P (cf))
272 {
273 tree last_iteration = SUB_LAST_CONFLICT (subscript);
274 fprintf (outf, " last_conflict: ");
275 print_generic_stmt (outf, last_iteration, 0);
276 }
277
278 cf = SUB_CONFLICTS_IN_B (subscript);
279 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
280 dump_conflict_function (outf, cf);
281 if (CF_NONTRIVIAL_P (cf))
282 {
283 tree last_iteration = SUB_LAST_CONFLICT (subscript);
284 fprintf (outf, " last_conflict: ");
285 print_generic_stmt (outf, last_iteration, 0);
286 }
287
288 fprintf (outf, " (Subscript distance: ");
289 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
290 fprintf (outf, " )\n");
291 fprintf (outf, " )\n");
292 }
293
294 /* Print the classic direction vector DIRV to OUTF. */
295
296 void
297 print_direction_vector (FILE *outf,
298 lambda_vector dirv,
299 int length)
300 {
301 int eq;
302
303 for (eq = 0; eq < length; eq++)
304 {
305 enum data_dependence_direction dir = ((enum data_dependence_direction)
306 dirv[eq]);
307
308 switch (dir)
309 {
310 case dir_positive:
311 fprintf (outf, " +");
312 break;
313 case dir_negative:
314 fprintf (outf, " -");
315 break;
316 case dir_equal:
317 fprintf (outf, " =");
318 break;
319 case dir_positive_or_equal:
320 fprintf (outf, " +=");
321 break;
322 case dir_positive_or_negative:
323 fprintf (outf, " +-");
324 break;
325 case dir_negative_or_equal:
326 fprintf (outf, " -=");
327 break;
328 case dir_star:
329 fprintf (outf, " *");
330 break;
331 default:
332 fprintf (outf, "indep");
333 break;
334 }
335 }
336 fprintf (outf, "\n");
337 }
338
339 /* Print a vector of direction vectors. */
340
341 void
342 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
343 int length)
344 {
345 unsigned j;
346 lambda_vector v;
347
348 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
349 print_direction_vector (outf, v, length);
350 }
351
352 /* Print a vector of distance vectors. */
353
354 void
355 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
356 int length)
357 {
358 unsigned j;
359 lambda_vector v;
360
361 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
362 print_lambda_vector (outf, v, length);
363 }
364
365 /* Debug version. */
366
367 void
368 debug_data_dependence_relation (struct data_dependence_relation *ddr)
369 {
370 dump_data_dependence_relation (stderr, ddr);
371 }
372
373 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
374
375 void
376 dump_data_dependence_relation (FILE *outf,
377 struct data_dependence_relation *ddr)
378 {
379 struct data_reference *dra, *drb;
380
381 fprintf (outf, "(Data Dep: \n");
382
383 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
384 {
385 if (ddr)
386 {
387 dra = DDR_A (ddr);
388 drb = DDR_B (ddr);
389 if (dra)
390 dump_data_reference (outf, dra);
391 else
392 fprintf (outf, " (nil)\n");
393 if (drb)
394 dump_data_reference (outf, drb);
395 else
396 fprintf (outf, " (nil)\n");
397 }
398 fprintf (outf, " (don't know)\n)\n");
399 return;
400 }
401
402 dra = DDR_A (ddr);
403 drb = DDR_B (ddr);
404 dump_data_reference (outf, dra);
405 dump_data_reference (outf, drb);
406
407 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
408 fprintf (outf, " (no dependence)\n");
409
410 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
411 {
412 unsigned int i;
413 struct loop *loopi;
414
415 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
416 {
417 fprintf (outf, " access_fn_A: ");
418 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
419 fprintf (outf, " access_fn_B: ");
420 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
421 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
422 }
423
424 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
425 fprintf (outf, " loop nest: (");
426 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
427 fprintf (outf, "%d ", loopi->num);
428 fprintf (outf, ")\n");
429
430 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
431 {
432 fprintf (outf, " distance_vector: ");
433 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
434 DDR_NB_LOOPS (ddr));
435 }
436
437 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
438 {
439 fprintf (outf, " direction_vector: ");
440 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
441 DDR_NB_LOOPS (ddr));
442 }
443 }
444
445 fprintf (outf, ")\n");
446 }
447
448 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
449
450 void
451 dump_data_dependence_direction (FILE *file,
452 enum data_dependence_direction dir)
453 {
454 switch (dir)
455 {
456 case dir_positive:
457 fprintf (file, "+");
458 break;
459
460 case dir_negative:
461 fprintf (file, "-");
462 break;
463
464 case dir_equal:
465 fprintf (file, "=");
466 break;
467
468 case dir_positive_or_negative:
469 fprintf (file, "+-");
470 break;
471
472 case dir_positive_or_equal:
473 fprintf (file, "+=");
474 break;
475
476 case dir_negative_or_equal:
477 fprintf (file, "-=");
478 break;
479
480 case dir_star:
481 fprintf (file, "*");
482 break;
483
484 default:
485 break;
486 }
487 }
488
489 /* Dumps the distance and direction vectors in FILE. DDRS contains
490 the dependence relations, and VECT_SIZE is the size of the
491 dependence vectors, or in other words the number of loops in the
492 considered nest. */
493
494 void
495 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
496 {
497 unsigned int i, j;
498 struct data_dependence_relation *ddr;
499 lambda_vector v;
500
501 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
502 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
503 {
504 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
505 {
506 fprintf (file, "DISTANCE_V (");
507 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
508 fprintf (file, ")\n");
509 }
510
511 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
512 {
513 fprintf (file, "DIRECTION_V (");
514 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
515 fprintf (file, ")\n");
516 }
517 }
518
519 fprintf (file, "\n\n");
520 }
521
522 /* Dumps the data dependence relations DDRS in FILE. */
523
524 void
525 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
526 {
527 unsigned int i;
528 struct data_dependence_relation *ddr;
529
530 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
531 dump_data_dependence_relation (file, ddr);
532
533 fprintf (file, "\n\n");
534 }
535
536 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
537 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
538 constant of type ssizetype, and returns true. If we cannot do this
539 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
540 is returned. */
541
542 static bool
543 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
544 tree *var, tree *off)
545 {
546 tree var0, var1;
547 tree off0, off1;
548 enum tree_code ocode = code;
549
550 *var = NULL_TREE;
551 *off = NULL_TREE;
552
553 switch (code)
554 {
555 case INTEGER_CST:
556 *var = build_int_cst (type, 0);
557 *off = fold_convert (ssizetype, op0);
558 return true;
559
560 case POINTER_PLUS_EXPR:
561 ocode = PLUS_EXPR;
562 /* FALLTHROUGH */
563 case PLUS_EXPR:
564 case MINUS_EXPR:
565 split_constant_offset (op0, &var0, &off0);
566 split_constant_offset (op1, &var1, &off1);
567 *var = fold_build2 (code, type, var0, var1);
568 *off = size_binop (ocode, off0, off1);
569 return true;
570
571 case MULT_EXPR:
572 if (TREE_CODE (op1) != INTEGER_CST)
573 return false;
574
575 split_constant_offset (op0, &var0, &off0);
576 *var = fold_build2 (MULT_EXPR, type, var0, op1);
577 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
578 return true;
579
580 case ADDR_EXPR:
581 {
582 tree base, poffset;
583 HOST_WIDE_INT pbitsize, pbitpos;
584 enum machine_mode pmode;
585 int punsignedp, pvolatilep;
586
587 op0 = TREE_OPERAND (op0, 0);
588 if (!handled_component_p (op0))
589 return false;
590
591 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
592 &pmode, &punsignedp, &pvolatilep, false);
593
594 if (pbitpos % BITS_PER_UNIT != 0)
595 return false;
596 base = build_fold_addr_expr (base);
597 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
598
599 if (poffset)
600 {
601 split_constant_offset (poffset, &poffset, &off1);
602 off0 = size_binop (PLUS_EXPR, off0, off1);
603 if (POINTER_TYPE_P (TREE_TYPE (base)))
604 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
605 base, fold_convert (sizetype, poffset));
606 else
607 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
608 fold_convert (TREE_TYPE (base), poffset));
609 }
610
611 var0 = fold_convert (type, base);
612
613 /* If variable length types are involved, punt, otherwise casts
614 might be converted into ARRAY_REFs in gimplify_conversion.
615 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
616 possibly no longer appears in current GIMPLE, might resurface.
617 This perhaps could run
618 if (CONVERT_EXPR_P (var0))
619 {
620 gimplify_conversion (&var0);
621 // Attempt to fill in any within var0 found ARRAY_REF's
622 // element size from corresponding op embedded ARRAY_REF,
623 // if unsuccessful, just punt.
624 } */
625 while (POINTER_TYPE_P (type))
626 type = TREE_TYPE (type);
627 if (int_size_in_bytes (type) < 0)
628 return false;
629
630 *var = var0;
631 *off = off0;
632 return true;
633 }
634
635 case SSA_NAME:
636 {
637 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
638 enum tree_code subcode;
639
640 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
641 return false;
642
643 var0 = gimple_assign_rhs1 (def_stmt);
644 subcode = gimple_assign_rhs_code (def_stmt);
645 var1 = gimple_assign_rhs2 (def_stmt);
646
647 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
648 }
649 CASE_CONVERT:
650 {
651 /* We must not introduce undefined overflow, and we must not change the value.
652 Hence we're okay if the inner type doesn't overflow to start with
653 (pointer or signed), the outer type also is an integer or pointer
654 and the outer precision is at least as large as the inner. */
655 tree itype = TREE_TYPE (op0);
656 if ((POINTER_TYPE_P (itype)
657 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
658 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
659 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
660 {
661 split_constant_offset (op0, &var0, off);
662 *var = fold_convert (type, var0);
663 return true;
664 }
665 return false;
666 }
667
668 default:
669 return false;
670 }
671 }
672
673 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
674 will be ssizetype. */
675
676 void
677 split_constant_offset (tree exp, tree *var, tree *off)
678 {
679 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
680 enum tree_code code;
681
682 *var = exp;
683 *off = ssize_int (0);
684 STRIP_NOPS (exp);
685
686 if (automatically_generated_chrec_p (exp))
687 return;
688
689 otype = TREE_TYPE (exp);
690 code = TREE_CODE (exp);
691 extract_ops_from_tree (exp, &code, &op0, &op1);
692 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
693 {
694 *var = fold_convert (type, e);
695 *off = o;
696 }
697 }
698
699 /* Returns the address ADDR of an object in a canonical shape (without nop
700 casts, and with type of pointer to the object). */
701
702 static tree
703 canonicalize_base_object_address (tree addr)
704 {
705 tree orig = addr;
706
707 STRIP_NOPS (addr);
708
709 /* The base address may be obtained by casting from integer, in that case
710 keep the cast. */
711 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
712 return orig;
713
714 if (TREE_CODE (addr) != ADDR_EXPR)
715 return addr;
716
717 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
718 }
719
720 /* Analyzes the behavior of the memory reference DR in the innermost loop or
721 basic block that contains it. Returns true if analysis succeed or false
722 otherwise. */
723
724 bool
725 dr_analyze_innermost (struct data_reference *dr)
726 {
727 gimple stmt = DR_STMT (dr);
728 struct loop *loop = loop_containing_stmt (stmt);
729 tree ref = DR_REF (dr);
730 HOST_WIDE_INT pbitsize, pbitpos;
731 tree base, poffset;
732 enum machine_mode pmode;
733 int punsignedp, pvolatilep;
734 affine_iv base_iv, offset_iv;
735 tree init, dinit, step;
736 bool in_loop = (loop && loop->num);
737
738 if (dump_file && (dump_flags & TDF_DETAILS))
739 fprintf (dump_file, "analyze_innermost: ");
740
741 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
742 &pmode, &punsignedp, &pvolatilep, false);
743 gcc_assert (base != NULL_TREE);
744
745 if (pbitpos % BITS_PER_UNIT != 0)
746 {
747 if (dump_file && (dump_flags & TDF_DETAILS))
748 fprintf (dump_file, "failed: bit offset alignment.\n");
749 return false;
750 }
751
752 base = build_fold_addr_expr (base);
753 if (in_loop)
754 {
755 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
756 false))
757 {
758 if (dump_file && (dump_flags & TDF_DETAILS))
759 fprintf (dump_file, "failed: evolution of base is not affine.\n");
760 return false;
761 }
762 }
763 else
764 {
765 base_iv.base = base;
766 base_iv.step = ssize_int (0);
767 base_iv.no_overflow = true;
768 }
769
770 if (!poffset)
771 {
772 offset_iv.base = ssize_int (0);
773 offset_iv.step = ssize_int (0);
774 }
775 else
776 {
777 if (!in_loop)
778 {
779 offset_iv.base = poffset;
780 offset_iv.step = ssize_int (0);
781 }
782 else if (!simple_iv (loop, loop_containing_stmt (stmt),
783 poffset, &offset_iv, false))
784 {
785 if (dump_file && (dump_flags & TDF_DETAILS))
786 fprintf (dump_file, "failed: evolution of offset is not"
787 " affine.\n");
788 return false;
789 }
790 }
791
792 init = ssize_int (pbitpos / BITS_PER_UNIT);
793 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
794 init = size_binop (PLUS_EXPR, init, dinit);
795 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
796 init = size_binop (PLUS_EXPR, init, dinit);
797
798 step = size_binop (PLUS_EXPR,
799 fold_convert (ssizetype, base_iv.step),
800 fold_convert (ssizetype, offset_iv.step));
801
802 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
803
804 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
805 DR_INIT (dr) = init;
806 DR_STEP (dr) = step;
807
808 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
809
810 if (dump_file && (dump_flags & TDF_DETAILS))
811 fprintf (dump_file, "success.\n");
812
813 return true;
814 }
815
816 /* Determines the base object and the list of indices of memory reference
817 DR, analyzed in loop nest NEST. */
818
819 static void
820 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
821 {
822 gimple stmt = DR_STMT (dr);
823 struct loop *loop = loop_containing_stmt (stmt);
824 VEC (tree, heap) *access_fns = NULL;
825 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
826 tree base, off, access_fn = NULL_TREE;
827 basic_block before_loop = NULL;
828
829 if (nest)
830 before_loop = block_before_loop (nest);
831
832 while (handled_component_p (aref))
833 {
834 if (TREE_CODE (aref) == ARRAY_REF)
835 {
836 op = TREE_OPERAND (aref, 1);
837 if (nest)
838 {
839 access_fn = analyze_scalar_evolution (loop, op);
840 access_fn = instantiate_scev (before_loop, loop, access_fn);
841 VEC_safe_push (tree, heap, access_fns, access_fn);
842 }
843
844 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
845 }
846
847 aref = TREE_OPERAND (aref, 0);
848 }
849
850 if (nest && INDIRECT_REF_P (aref))
851 {
852 op = TREE_OPERAND (aref, 0);
853 access_fn = analyze_scalar_evolution (loop, op);
854 access_fn = instantiate_scev (before_loop, loop, access_fn);
855 base = initial_condition (access_fn);
856 split_constant_offset (base, &base, &off);
857 access_fn = chrec_replace_initial_condition (access_fn,
858 fold_convert (TREE_TYPE (base), off));
859
860 TREE_OPERAND (aref, 0) = base;
861 VEC_safe_push (tree, heap, access_fns, access_fn);
862 }
863
864 DR_BASE_OBJECT (dr) = ref;
865 DR_ACCESS_FNS (dr) = access_fns;
866 }
867
868 /* Extracts the alias analysis information from the memory reference DR. */
869
870 static void
871 dr_analyze_alias (struct data_reference *dr)
872 {
873 tree ref = DR_REF (dr);
874 tree base = get_base_address (ref), addr;
875
876 if (INDIRECT_REF_P (base))
877 {
878 addr = TREE_OPERAND (base, 0);
879 if (TREE_CODE (addr) == SSA_NAME)
880 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
881 }
882 }
883
884 /* Returns true if the address of DR is invariant. */
885
886 static bool
887 dr_address_invariant_p (struct data_reference *dr)
888 {
889 unsigned i;
890 tree idx;
891
892 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
893 if (tree_contains_chrecs (idx, NULL))
894 return false;
895
896 return true;
897 }
898
899 /* Frees data reference DR. */
900
901 void
902 free_data_ref (data_reference_p dr)
903 {
904 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
905 free (dr);
906 }
907
908 /* Analyzes memory reference MEMREF accessed in STMT. The reference
909 is read if IS_READ is true, write otherwise. Returns the
910 data_reference description of MEMREF. NEST is the outermost loop of the
911 loop nest in that the reference should be analyzed. */
912
913 struct data_reference *
914 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
915 {
916 struct data_reference *dr;
917
918 if (dump_file && (dump_flags & TDF_DETAILS))
919 {
920 fprintf (dump_file, "Creating dr for ");
921 print_generic_expr (dump_file, memref, TDF_SLIM);
922 fprintf (dump_file, "\n");
923 }
924
925 dr = XCNEW (struct data_reference);
926 DR_STMT (dr) = stmt;
927 DR_REF (dr) = memref;
928 DR_IS_READ (dr) = is_read;
929
930 dr_analyze_innermost (dr);
931 dr_analyze_indices (dr, nest);
932 dr_analyze_alias (dr);
933
934 if (dump_file && (dump_flags & TDF_DETAILS))
935 {
936 fprintf (dump_file, "\tbase_address: ");
937 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
938 fprintf (dump_file, "\n\toffset from base address: ");
939 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
940 fprintf (dump_file, "\n\tconstant offset from base address: ");
941 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
942 fprintf (dump_file, "\n\tstep: ");
943 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
944 fprintf (dump_file, "\n\taligned to: ");
945 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
946 fprintf (dump_file, "\n\tbase_object: ");
947 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
948 fprintf (dump_file, "\n");
949 }
950
951 return dr;
952 }
953
954 /* Returns true if FNA == FNB. */
955
956 static bool
957 affine_function_equal_p (affine_fn fna, affine_fn fnb)
958 {
959 unsigned i, n = VEC_length (tree, fna);
960
961 if (n != VEC_length (tree, fnb))
962 return false;
963
964 for (i = 0; i < n; i++)
965 if (!operand_equal_p (VEC_index (tree, fna, i),
966 VEC_index (tree, fnb, i), 0))
967 return false;
968
969 return true;
970 }
971
972 /* If all the functions in CF are the same, returns one of them,
973 otherwise returns NULL. */
974
975 static affine_fn
976 common_affine_function (conflict_function *cf)
977 {
978 unsigned i;
979 affine_fn comm;
980
981 if (!CF_NONTRIVIAL_P (cf))
982 return NULL;
983
984 comm = cf->fns[0];
985
986 for (i = 1; i < cf->n; i++)
987 if (!affine_function_equal_p (comm, cf->fns[i]))
988 return NULL;
989
990 return comm;
991 }
992
993 /* Returns the base of the affine function FN. */
994
995 static tree
996 affine_function_base (affine_fn fn)
997 {
998 return VEC_index (tree, fn, 0);
999 }
1000
1001 /* Returns true if FN is a constant. */
1002
1003 static bool
1004 affine_function_constant_p (affine_fn fn)
1005 {
1006 unsigned i;
1007 tree coef;
1008
1009 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1010 if (!integer_zerop (coef))
1011 return false;
1012
1013 return true;
1014 }
1015
1016 /* Returns true if FN is the zero constant function. */
1017
1018 static bool
1019 affine_function_zero_p (affine_fn fn)
1020 {
1021 return (integer_zerop (affine_function_base (fn))
1022 && affine_function_constant_p (fn));
1023 }
1024
1025 /* Returns a signed integer type with the largest precision from TA
1026 and TB. */
1027
1028 static tree
1029 signed_type_for_types (tree ta, tree tb)
1030 {
1031 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1032 return signed_type_for (ta);
1033 else
1034 return signed_type_for (tb);
1035 }
1036
1037 /* Applies operation OP on affine functions FNA and FNB, and returns the
1038 result. */
1039
1040 static affine_fn
1041 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1042 {
1043 unsigned i, n, m;
1044 affine_fn ret;
1045 tree coef;
1046
1047 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1048 {
1049 n = VEC_length (tree, fna);
1050 m = VEC_length (tree, fnb);
1051 }
1052 else
1053 {
1054 n = VEC_length (tree, fnb);
1055 m = VEC_length (tree, fna);
1056 }
1057
1058 ret = VEC_alloc (tree, heap, m);
1059 for (i = 0; i < n; i++)
1060 {
1061 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1062 TREE_TYPE (VEC_index (tree, fnb, i)));
1063
1064 VEC_quick_push (tree, ret,
1065 fold_build2 (op, type,
1066 VEC_index (tree, fna, i),
1067 VEC_index (tree, fnb, i)));
1068 }
1069
1070 for (; VEC_iterate (tree, fna, i, coef); i++)
1071 VEC_quick_push (tree, ret,
1072 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1073 coef, integer_zero_node));
1074 for (; VEC_iterate (tree, fnb, i, coef); i++)
1075 VEC_quick_push (tree, ret,
1076 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1077 integer_zero_node, coef));
1078
1079 return ret;
1080 }
1081
1082 /* Returns the sum of affine functions FNA and FNB. */
1083
1084 static affine_fn
1085 affine_fn_plus (affine_fn fna, affine_fn fnb)
1086 {
1087 return affine_fn_op (PLUS_EXPR, fna, fnb);
1088 }
1089
1090 /* Returns the difference of affine functions FNA and FNB. */
1091
1092 static affine_fn
1093 affine_fn_minus (affine_fn fna, affine_fn fnb)
1094 {
1095 return affine_fn_op (MINUS_EXPR, fna, fnb);
1096 }
1097
1098 /* Frees affine function FN. */
1099
1100 static void
1101 affine_fn_free (affine_fn fn)
1102 {
1103 VEC_free (tree, heap, fn);
1104 }
1105
1106 /* Determine for each subscript in the data dependence relation DDR
1107 the distance. */
1108
1109 static void
1110 compute_subscript_distance (struct data_dependence_relation *ddr)
1111 {
1112 conflict_function *cf_a, *cf_b;
1113 affine_fn fn_a, fn_b, diff;
1114
1115 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1116 {
1117 unsigned int i;
1118
1119 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1120 {
1121 struct subscript *subscript;
1122
1123 subscript = DDR_SUBSCRIPT (ddr, i);
1124 cf_a = SUB_CONFLICTS_IN_A (subscript);
1125 cf_b = SUB_CONFLICTS_IN_B (subscript);
1126
1127 fn_a = common_affine_function (cf_a);
1128 fn_b = common_affine_function (cf_b);
1129 if (!fn_a || !fn_b)
1130 {
1131 SUB_DISTANCE (subscript) = chrec_dont_know;
1132 return;
1133 }
1134 diff = affine_fn_minus (fn_a, fn_b);
1135
1136 if (affine_function_constant_p (diff))
1137 SUB_DISTANCE (subscript) = affine_function_base (diff);
1138 else
1139 SUB_DISTANCE (subscript) = chrec_dont_know;
1140
1141 affine_fn_free (diff);
1142 }
1143 }
1144 }
1145
1146 /* Returns the conflict function for "unknown". */
1147
1148 static conflict_function *
1149 conflict_fn_not_known (void)
1150 {
1151 conflict_function *fn = XCNEW (conflict_function);
1152 fn->n = NOT_KNOWN;
1153
1154 return fn;
1155 }
1156
1157 /* Returns the conflict function for "independent". */
1158
1159 static conflict_function *
1160 conflict_fn_no_dependence (void)
1161 {
1162 conflict_function *fn = XCNEW (conflict_function);
1163 fn->n = NO_DEPENDENCE;
1164
1165 return fn;
1166 }
1167
1168 /* Returns true if the address of OBJ is invariant in LOOP. */
1169
1170 static bool
1171 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1172 {
1173 while (handled_component_p (obj))
1174 {
1175 if (TREE_CODE (obj) == ARRAY_REF)
1176 {
1177 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1178 need to check the stride and the lower bound of the reference. */
1179 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1180 loop->num)
1181 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1182 loop->num))
1183 return false;
1184 }
1185 else if (TREE_CODE (obj) == COMPONENT_REF)
1186 {
1187 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1188 loop->num))
1189 return false;
1190 }
1191 obj = TREE_OPERAND (obj, 0);
1192 }
1193
1194 if (!INDIRECT_REF_P (obj))
1195 return true;
1196
1197 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1198 loop->num);
1199 }
1200
1201 /* Returns true if A and B are accesses to different objects, or to different
1202 fields of the same object. */
1203
1204 static bool
1205 disjoint_objects_p (tree a, tree b)
1206 {
1207 tree base_a, base_b;
1208 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1209 bool ret;
1210
1211 base_a = get_base_address (a);
1212 base_b = get_base_address (b);
1213
1214 if (DECL_P (base_a)
1215 && DECL_P (base_b)
1216 && base_a != base_b)
1217 return true;
1218
1219 if (!operand_equal_p (base_a, base_b, 0))
1220 return false;
1221
1222 /* Compare the component references of A and B. We must start from the inner
1223 ones, so record them to the vector first. */
1224 while (handled_component_p (a))
1225 {
1226 VEC_safe_push (tree, heap, comp_a, a);
1227 a = TREE_OPERAND (a, 0);
1228 }
1229 while (handled_component_p (b))
1230 {
1231 VEC_safe_push (tree, heap, comp_b, b);
1232 b = TREE_OPERAND (b, 0);
1233 }
1234
1235 ret = false;
1236 while (1)
1237 {
1238 if (VEC_length (tree, comp_a) == 0
1239 || VEC_length (tree, comp_b) == 0)
1240 break;
1241
1242 a = VEC_pop (tree, comp_a);
1243 b = VEC_pop (tree, comp_b);
1244
1245 /* Real and imaginary part of a variable do not alias. */
1246 if ((TREE_CODE (a) == REALPART_EXPR
1247 && TREE_CODE (b) == IMAGPART_EXPR)
1248 || (TREE_CODE (a) == IMAGPART_EXPR
1249 && TREE_CODE (b) == REALPART_EXPR))
1250 {
1251 ret = true;
1252 break;
1253 }
1254
1255 if (TREE_CODE (a) != TREE_CODE (b))
1256 break;
1257
1258 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1259 DR_BASE_OBJECT are always zero. */
1260 if (TREE_CODE (a) == ARRAY_REF)
1261 continue;
1262 else if (TREE_CODE (a) == COMPONENT_REF)
1263 {
1264 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1265 continue;
1266
1267 /* Different fields of unions may overlap. */
1268 base_a = TREE_OPERAND (a, 0);
1269 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1270 break;
1271
1272 /* Different fields of structures cannot. */
1273 ret = true;
1274 break;
1275 }
1276 else
1277 break;
1278 }
1279
1280 VEC_free (tree, heap, comp_a);
1281 VEC_free (tree, heap, comp_b);
1282
1283 return ret;
1284 }
1285
1286 /* Returns false if we can prove that data references A and B do not alias,
1287 true otherwise. */
1288
1289 bool
1290 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1291 {
1292 const_tree addr_a = DR_BASE_ADDRESS (a);
1293 const_tree addr_b = DR_BASE_ADDRESS (b);
1294 const_tree type_a, type_b;
1295 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1296
1297 /* If the accessed objects are disjoint, the memory references do not
1298 alias. */
1299 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1300 return false;
1301
1302 /* Query the alias oracle. */
1303 if (!DR_IS_READ (a) && !DR_IS_READ (b))
1304 {
1305 if (!refs_output_dependent_p (DR_REF (a), DR_REF (b)))
1306 return false;
1307 }
1308 else if (DR_IS_READ (a) && !DR_IS_READ (b))
1309 {
1310 if (!refs_anti_dependent_p (DR_REF (a), DR_REF (b)))
1311 return false;
1312 }
1313 else if (!refs_may_alias_p (DR_REF (a), DR_REF (b)))
1314 return false;
1315
1316 if (!addr_a || !addr_b)
1317 return true;
1318
1319 /* If the references are based on different static objects, they cannot
1320 alias (PTA should be able to disambiguate such accesses, but often
1321 it fails to). */
1322 if (TREE_CODE (addr_a) == ADDR_EXPR
1323 && TREE_CODE (addr_b) == ADDR_EXPR)
1324 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1325
1326 /* An instruction writing through a restricted pointer is "independent" of any
1327 instruction reading or writing through a different restricted pointer,
1328 in the same block/scope. */
1329
1330 type_a = TREE_TYPE (addr_a);
1331 type_b = TREE_TYPE (addr_b);
1332 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1333
1334 if (TREE_CODE (addr_a) == SSA_NAME)
1335 decl_a = SSA_NAME_VAR (addr_a);
1336 if (TREE_CODE (addr_b) == SSA_NAME)
1337 decl_b = SSA_NAME_VAR (addr_b);
1338
1339 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1340 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1341 && decl_a && DECL_P (decl_a)
1342 && decl_b && DECL_P (decl_b)
1343 && decl_a != decl_b
1344 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1345 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1346 return false;
1347
1348 return true;
1349 }
1350
1351 static void compute_self_dependence (struct data_dependence_relation *);
1352
1353 /* Initialize a data dependence relation between data accesses A and
1354 B. NB_LOOPS is the number of loops surrounding the references: the
1355 size of the classic distance/direction vectors. */
1356
1357 static struct data_dependence_relation *
1358 initialize_data_dependence_relation (struct data_reference *a,
1359 struct data_reference *b,
1360 VEC (loop_p, heap) *loop_nest)
1361 {
1362 struct data_dependence_relation *res;
1363 unsigned int i;
1364
1365 res = XNEW (struct data_dependence_relation);
1366 DDR_A (res) = a;
1367 DDR_B (res) = b;
1368 DDR_LOOP_NEST (res) = NULL;
1369 DDR_REVERSED_P (res) = false;
1370 DDR_SUBSCRIPTS (res) = NULL;
1371 DDR_DIR_VECTS (res) = NULL;
1372 DDR_DIST_VECTS (res) = NULL;
1373
1374 if (a == NULL || b == NULL)
1375 {
1376 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1377 return res;
1378 }
1379
1380 /* If the data references do not alias, then they are independent. */
1381 if (!dr_may_alias_p (a, b))
1382 {
1383 DDR_ARE_DEPENDENT (res) = chrec_known;
1384 return res;
1385 }
1386
1387 /* When the references are exactly the same, don't spend time doing
1388 the data dependence tests, just initialize the ddr and return. */
1389 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1390 {
1391 DDR_AFFINE_P (res) = true;
1392 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1393 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1394 DDR_LOOP_NEST (res) = loop_nest;
1395 DDR_INNER_LOOP (res) = 0;
1396 DDR_SELF_REFERENCE (res) = true;
1397 compute_self_dependence (res);
1398 return res;
1399 }
1400
1401 /* If the references do not access the same object, we do not know
1402 whether they alias or not. */
1403 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1404 {
1405 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1406 return res;
1407 }
1408
1409 /* If the base of the object is not invariant in the loop nest, we cannot
1410 analyze it. TODO -- in fact, it would suffice to record that there may
1411 be arbitrary dependences in the loops where the base object varies. */
1412 if (loop_nest
1413 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1414 DR_BASE_OBJECT (a)))
1415 {
1416 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1417 return res;
1418 }
1419
1420 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1421
1422 DDR_AFFINE_P (res) = true;
1423 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1424 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1425 DDR_LOOP_NEST (res) = loop_nest;
1426 DDR_INNER_LOOP (res) = 0;
1427 DDR_SELF_REFERENCE (res) = false;
1428
1429 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1430 {
1431 struct subscript *subscript;
1432
1433 subscript = XNEW (struct subscript);
1434 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1435 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1436 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1437 SUB_DISTANCE (subscript) = chrec_dont_know;
1438 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1439 }
1440
1441 return res;
1442 }
1443
1444 /* Frees memory used by the conflict function F. */
1445
1446 static void
1447 free_conflict_function (conflict_function *f)
1448 {
1449 unsigned i;
1450
1451 if (CF_NONTRIVIAL_P (f))
1452 {
1453 for (i = 0; i < f->n; i++)
1454 affine_fn_free (f->fns[i]);
1455 }
1456 free (f);
1457 }
1458
1459 /* Frees memory used by SUBSCRIPTS. */
1460
1461 static void
1462 free_subscripts (VEC (subscript_p, heap) *subscripts)
1463 {
1464 unsigned i;
1465 subscript_p s;
1466
1467 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1468 {
1469 free_conflict_function (s->conflicting_iterations_in_a);
1470 free_conflict_function (s->conflicting_iterations_in_b);
1471 free (s);
1472 }
1473 VEC_free (subscript_p, heap, subscripts);
1474 }
1475
1476 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1477 description. */
1478
1479 static inline void
1480 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1481 tree chrec)
1482 {
1483 if (dump_file && (dump_flags & TDF_DETAILS))
1484 {
1485 fprintf (dump_file, "(dependence classified: ");
1486 print_generic_expr (dump_file, chrec, 0);
1487 fprintf (dump_file, ")\n");
1488 }
1489
1490 DDR_ARE_DEPENDENT (ddr) = chrec;
1491 free_subscripts (DDR_SUBSCRIPTS (ddr));
1492 DDR_SUBSCRIPTS (ddr) = NULL;
1493 }
1494
1495 /* The dependence relation DDR cannot be represented by a distance
1496 vector. */
1497
1498 static inline void
1499 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1500 {
1501 if (dump_file && (dump_flags & TDF_DETAILS))
1502 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1503
1504 DDR_AFFINE_P (ddr) = false;
1505 }
1506
1507 \f
1508
1509 /* This section contains the classic Banerjee tests. */
1510
1511 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1512 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1513
1514 static inline bool
1515 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1516 {
1517 return (evolution_function_is_constant_p (chrec_a)
1518 && evolution_function_is_constant_p (chrec_b));
1519 }
1520
1521 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1522 variable, i.e., if the SIV (Single Index Variable) test is true. */
1523
1524 static bool
1525 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1526 {
1527 if ((evolution_function_is_constant_p (chrec_a)
1528 && evolution_function_is_univariate_p (chrec_b))
1529 || (evolution_function_is_constant_p (chrec_b)
1530 && evolution_function_is_univariate_p (chrec_a)))
1531 return true;
1532
1533 if (evolution_function_is_univariate_p (chrec_a)
1534 && evolution_function_is_univariate_p (chrec_b))
1535 {
1536 switch (TREE_CODE (chrec_a))
1537 {
1538 case POLYNOMIAL_CHREC:
1539 switch (TREE_CODE (chrec_b))
1540 {
1541 case POLYNOMIAL_CHREC:
1542 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1543 return false;
1544
1545 default:
1546 return true;
1547 }
1548
1549 default:
1550 return true;
1551 }
1552 }
1553
1554 return false;
1555 }
1556
1557 /* Creates a conflict function with N dimensions. The affine functions
1558 in each dimension follow. */
1559
1560 static conflict_function *
1561 conflict_fn (unsigned n, ...)
1562 {
1563 unsigned i;
1564 conflict_function *ret = XCNEW (conflict_function);
1565 va_list ap;
1566
1567 gcc_assert (0 < n && n <= MAX_DIM);
1568 va_start(ap, n);
1569
1570 ret->n = n;
1571 for (i = 0; i < n; i++)
1572 ret->fns[i] = va_arg (ap, affine_fn);
1573 va_end(ap);
1574
1575 return ret;
1576 }
1577
1578 /* Returns constant affine function with value CST. */
1579
1580 static affine_fn
1581 affine_fn_cst (tree cst)
1582 {
1583 affine_fn fn = VEC_alloc (tree, heap, 1);
1584 VEC_quick_push (tree, fn, cst);
1585 return fn;
1586 }
1587
1588 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1589
1590 static affine_fn
1591 affine_fn_univar (tree cst, unsigned dim, tree coef)
1592 {
1593 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1594 unsigned i;
1595
1596 gcc_assert (dim > 0);
1597 VEC_quick_push (tree, fn, cst);
1598 for (i = 1; i < dim; i++)
1599 VEC_quick_push (tree, fn, integer_zero_node);
1600 VEC_quick_push (tree, fn, coef);
1601 return fn;
1602 }
1603
1604 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1605 *OVERLAPS_B are initialized to the functions that describe the
1606 relation between the elements accessed twice by CHREC_A and
1607 CHREC_B. For k >= 0, the following property is verified:
1608
1609 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1610
1611 static void
1612 analyze_ziv_subscript (tree chrec_a,
1613 tree chrec_b,
1614 conflict_function **overlaps_a,
1615 conflict_function **overlaps_b,
1616 tree *last_conflicts)
1617 {
1618 tree type, difference;
1619 dependence_stats.num_ziv++;
1620
1621 if (dump_file && (dump_flags & TDF_DETAILS))
1622 fprintf (dump_file, "(analyze_ziv_subscript \n");
1623
1624 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1625 chrec_a = chrec_convert (type, chrec_a, NULL);
1626 chrec_b = chrec_convert (type, chrec_b, NULL);
1627 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1628
1629 switch (TREE_CODE (difference))
1630 {
1631 case INTEGER_CST:
1632 if (integer_zerop (difference))
1633 {
1634 /* The difference is equal to zero: the accessed index
1635 overlaps for each iteration in the loop. */
1636 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1637 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1638 *last_conflicts = chrec_dont_know;
1639 dependence_stats.num_ziv_dependent++;
1640 }
1641 else
1642 {
1643 /* The accesses do not overlap. */
1644 *overlaps_a = conflict_fn_no_dependence ();
1645 *overlaps_b = conflict_fn_no_dependence ();
1646 *last_conflicts = integer_zero_node;
1647 dependence_stats.num_ziv_independent++;
1648 }
1649 break;
1650
1651 default:
1652 /* We're not sure whether the indexes overlap. For the moment,
1653 conservatively answer "don't know". */
1654 if (dump_file && (dump_flags & TDF_DETAILS))
1655 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1656
1657 *overlaps_a = conflict_fn_not_known ();
1658 *overlaps_b = conflict_fn_not_known ();
1659 *last_conflicts = chrec_dont_know;
1660 dependence_stats.num_ziv_unimplemented++;
1661 break;
1662 }
1663
1664 if (dump_file && (dump_flags & TDF_DETAILS))
1665 fprintf (dump_file, ")\n");
1666 }
1667
1668 /* Sets NIT to the estimated number of executions of the statements in
1669 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1670 large as the number of iterations. If we have no reliable estimate,
1671 the function returns false, otherwise returns true. */
1672
1673 bool
1674 estimated_loop_iterations (struct loop *loop, bool conservative,
1675 double_int *nit)
1676 {
1677 estimate_numbers_of_iterations_loop (loop);
1678 if (conservative)
1679 {
1680 if (!loop->any_upper_bound)
1681 return false;
1682
1683 *nit = loop->nb_iterations_upper_bound;
1684 }
1685 else
1686 {
1687 if (!loop->any_estimate)
1688 return false;
1689
1690 *nit = loop->nb_iterations_estimate;
1691 }
1692
1693 return true;
1694 }
1695
1696 /* Similar to estimated_loop_iterations, but returns the estimate only
1697 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1698 on the number of iterations of LOOP could not be derived, returns -1. */
1699
1700 HOST_WIDE_INT
1701 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1702 {
1703 double_int nit;
1704 HOST_WIDE_INT hwi_nit;
1705
1706 if (!estimated_loop_iterations (loop, conservative, &nit))
1707 return -1;
1708
1709 if (!double_int_fits_in_shwi_p (nit))
1710 return -1;
1711 hwi_nit = double_int_to_shwi (nit);
1712
1713 return hwi_nit < 0 ? -1 : hwi_nit;
1714 }
1715
1716 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1717 and only if it fits to the int type. If this is not the case, or the
1718 estimate on the number of iterations of LOOP could not be derived, returns
1719 chrec_dont_know. */
1720
1721 static tree
1722 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1723 {
1724 double_int nit;
1725 tree type;
1726
1727 if (!estimated_loop_iterations (loop, conservative, &nit))
1728 return chrec_dont_know;
1729
1730 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1731 if (!double_int_fits_to_tree_p (type, nit))
1732 return chrec_dont_know;
1733
1734 return double_int_to_tree (type, nit);
1735 }
1736
1737 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1738 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1739 *OVERLAPS_B are initialized to the functions that describe the
1740 relation between the elements accessed twice by CHREC_A and
1741 CHREC_B. For k >= 0, the following property is verified:
1742
1743 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1744
1745 static void
1746 analyze_siv_subscript_cst_affine (tree chrec_a,
1747 tree chrec_b,
1748 conflict_function **overlaps_a,
1749 conflict_function **overlaps_b,
1750 tree *last_conflicts)
1751 {
1752 bool value0, value1, value2;
1753 tree type, difference, tmp;
1754
1755 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1756 chrec_a = chrec_convert (type, chrec_a, NULL);
1757 chrec_b = chrec_convert (type, chrec_b, NULL);
1758 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1759
1760 if (!chrec_is_positive (initial_condition (difference), &value0))
1761 {
1762 if (dump_file && (dump_flags & TDF_DETAILS))
1763 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1764
1765 dependence_stats.num_siv_unimplemented++;
1766 *overlaps_a = conflict_fn_not_known ();
1767 *overlaps_b = conflict_fn_not_known ();
1768 *last_conflicts = chrec_dont_know;
1769 return;
1770 }
1771 else
1772 {
1773 if (value0 == false)
1774 {
1775 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1776 {
1777 if (dump_file && (dump_flags & TDF_DETAILS))
1778 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1779
1780 *overlaps_a = conflict_fn_not_known ();
1781 *overlaps_b = conflict_fn_not_known ();
1782 *last_conflicts = chrec_dont_know;
1783 dependence_stats.num_siv_unimplemented++;
1784 return;
1785 }
1786 else
1787 {
1788 if (value1 == true)
1789 {
1790 /* Example:
1791 chrec_a = 12
1792 chrec_b = {10, +, 1}
1793 */
1794
1795 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1796 {
1797 HOST_WIDE_INT numiter;
1798 struct loop *loop = get_chrec_loop (chrec_b);
1799
1800 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1801 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1802 fold_build1 (ABS_EXPR, type, difference),
1803 CHREC_RIGHT (chrec_b));
1804 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1805 *last_conflicts = integer_one_node;
1806
1807
1808 /* Perform weak-zero siv test to see if overlap is
1809 outside the loop bounds. */
1810 numiter = estimated_loop_iterations_int (loop, false);
1811
1812 if (numiter >= 0
1813 && compare_tree_int (tmp, numiter) > 0)
1814 {
1815 free_conflict_function (*overlaps_a);
1816 free_conflict_function (*overlaps_b);
1817 *overlaps_a = conflict_fn_no_dependence ();
1818 *overlaps_b = conflict_fn_no_dependence ();
1819 *last_conflicts = integer_zero_node;
1820 dependence_stats.num_siv_independent++;
1821 return;
1822 }
1823 dependence_stats.num_siv_dependent++;
1824 return;
1825 }
1826
1827 /* When the step does not divide the difference, there are
1828 no overlaps. */
1829 else
1830 {
1831 *overlaps_a = conflict_fn_no_dependence ();
1832 *overlaps_b = conflict_fn_no_dependence ();
1833 *last_conflicts = integer_zero_node;
1834 dependence_stats.num_siv_independent++;
1835 return;
1836 }
1837 }
1838
1839 else
1840 {
1841 /* Example:
1842 chrec_a = 12
1843 chrec_b = {10, +, -1}
1844
1845 In this case, chrec_a will not overlap with chrec_b. */
1846 *overlaps_a = conflict_fn_no_dependence ();
1847 *overlaps_b = conflict_fn_no_dependence ();
1848 *last_conflicts = integer_zero_node;
1849 dependence_stats.num_siv_independent++;
1850 return;
1851 }
1852 }
1853 }
1854 else
1855 {
1856 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1857 {
1858 if (dump_file && (dump_flags & TDF_DETAILS))
1859 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1860
1861 *overlaps_a = conflict_fn_not_known ();
1862 *overlaps_b = conflict_fn_not_known ();
1863 *last_conflicts = chrec_dont_know;
1864 dependence_stats.num_siv_unimplemented++;
1865 return;
1866 }
1867 else
1868 {
1869 if (value2 == false)
1870 {
1871 /* Example:
1872 chrec_a = 3
1873 chrec_b = {10, +, -1}
1874 */
1875 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1876 {
1877 HOST_WIDE_INT numiter;
1878 struct loop *loop = get_chrec_loop (chrec_b);
1879
1880 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1881 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1882 CHREC_RIGHT (chrec_b));
1883 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1884 *last_conflicts = integer_one_node;
1885
1886 /* Perform weak-zero siv test to see if overlap is
1887 outside the loop bounds. */
1888 numiter = estimated_loop_iterations_int (loop, false);
1889
1890 if (numiter >= 0
1891 && compare_tree_int (tmp, numiter) > 0)
1892 {
1893 free_conflict_function (*overlaps_a);
1894 free_conflict_function (*overlaps_b);
1895 *overlaps_a = conflict_fn_no_dependence ();
1896 *overlaps_b = conflict_fn_no_dependence ();
1897 *last_conflicts = integer_zero_node;
1898 dependence_stats.num_siv_independent++;
1899 return;
1900 }
1901 dependence_stats.num_siv_dependent++;
1902 return;
1903 }
1904
1905 /* When the step does not divide the difference, there
1906 are no overlaps. */
1907 else
1908 {
1909 *overlaps_a = conflict_fn_no_dependence ();
1910 *overlaps_b = conflict_fn_no_dependence ();
1911 *last_conflicts = integer_zero_node;
1912 dependence_stats.num_siv_independent++;
1913 return;
1914 }
1915 }
1916 else
1917 {
1918 /* Example:
1919 chrec_a = 3
1920 chrec_b = {4, +, 1}
1921
1922 In this case, chrec_a will not overlap with chrec_b. */
1923 *overlaps_a = conflict_fn_no_dependence ();
1924 *overlaps_b = conflict_fn_no_dependence ();
1925 *last_conflicts = integer_zero_node;
1926 dependence_stats.num_siv_independent++;
1927 return;
1928 }
1929 }
1930 }
1931 }
1932 }
1933
1934 /* Helper recursive function for initializing the matrix A. Returns
1935 the initial value of CHREC. */
1936
1937 static tree
1938 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1939 {
1940 gcc_assert (chrec);
1941
1942 switch (TREE_CODE (chrec))
1943 {
1944 case POLYNOMIAL_CHREC:
1945 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1946
1947 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1948 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1949
1950 case PLUS_EXPR:
1951 case MULT_EXPR:
1952 case MINUS_EXPR:
1953 {
1954 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1955 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1956
1957 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1958 }
1959
1960 case NOP_EXPR:
1961 {
1962 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1963 return chrec_convert (chrec_type (chrec), op, NULL);
1964 }
1965
1966 case BIT_NOT_EXPR:
1967 {
1968 /* Handle ~X as -1 - X. */
1969 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1970 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1971 build_int_cst (TREE_TYPE (chrec), -1), op);
1972 }
1973
1974 case INTEGER_CST:
1975 return chrec;
1976
1977 default:
1978 gcc_unreachable ();
1979 return NULL_TREE;
1980 }
1981 }
1982
1983 #define FLOOR_DIV(x,y) ((x) / (y))
1984
1985 /* Solves the special case of the Diophantine equation:
1986 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1987
1988 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1989 number of iterations that loops X and Y run. The overlaps will be
1990 constructed as evolutions in dimension DIM. */
1991
1992 static void
1993 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1994 affine_fn *overlaps_a,
1995 affine_fn *overlaps_b,
1996 tree *last_conflicts, int dim)
1997 {
1998 if (((step_a > 0 && step_b > 0)
1999 || (step_a < 0 && step_b < 0)))
2000 {
2001 int step_overlaps_a, step_overlaps_b;
2002 int gcd_steps_a_b, last_conflict, tau2;
2003
2004 gcd_steps_a_b = gcd (step_a, step_b);
2005 step_overlaps_a = step_b / gcd_steps_a_b;
2006 step_overlaps_b = step_a / gcd_steps_a_b;
2007
2008 if (niter > 0)
2009 {
2010 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2011 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2012 last_conflict = tau2;
2013 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2014 }
2015 else
2016 *last_conflicts = chrec_dont_know;
2017
2018 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2019 build_int_cst (NULL_TREE,
2020 step_overlaps_a));
2021 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2022 build_int_cst (NULL_TREE,
2023 step_overlaps_b));
2024 }
2025
2026 else
2027 {
2028 *overlaps_a = affine_fn_cst (integer_zero_node);
2029 *overlaps_b = affine_fn_cst (integer_zero_node);
2030 *last_conflicts = integer_zero_node;
2031 }
2032 }
2033
2034 /* Solves the special case of a Diophantine equation where CHREC_A is
2035 an affine bivariate function, and CHREC_B is an affine univariate
2036 function. For example,
2037
2038 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2039
2040 has the following overlapping functions:
2041
2042 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2043 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2044 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2045
2046 FORNOW: This is a specialized implementation for a case occurring in
2047 a common benchmark. Implement the general algorithm. */
2048
2049 static void
2050 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2051 conflict_function **overlaps_a,
2052 conflict_function **overlaps_b,
2053 tree *last_conflicts)
2054 {
2055 bool xz_p, yz_p, xyz_p;
2056 int step_x, step_y, step_z;
2057 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2058 affine_fn overlaps_a_xz, overlaps_b_xz;
2059 affine_fn overlaps_a_yz, overlaps_b_yz;
2060 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2061 affine_fn ova1, ova2, ovb;
2062 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2063
2064 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2065 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2066 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2067
2068 niter_x =
2069 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
2070 false);
2071 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
2072 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
2073
2074 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2075 {
2076 if (dump_file && (dump_flags & TDF_DETAILS))
2077 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2078
2079 *overlaps_a = conflict_fn_not_known ();
2080 *overlaps_b = conflict_fn_not_known ();
2081 *last_conflicts = chrec_dont_know;
2082 return;
2083 }
2084
2085 niter = MIN (niter_x, niter_z);
2086 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2087 &overlaps_a_xz,
2088 &overlaps_b_xz,
2089 &last_conflicts_xz, 1);
2090 niter = MIN (niter_y, niter_z);
2091 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2092 &overlaps_a_yz,
2093 &overlaps_b_yz,
2094 &last_conflicts_yz, 2);
2095 niter = MIN (niter_x, niter_z);
2096 niter = MIN (niter_y, niter);
2097 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2098 &overlaps_a_xyz,
2099 &overlaps_b_xyz,
2100 &last_conflicts_xyz, 3);
2101
2102 xz_p = !integer_zerop (last_conflicts_xz);
2103 yz_p = !integer_zerop (last_conflicts_yz);
2104 xyz_p = !integer_zerop (last_conflicts_xyz);
2105
2106 if (xz_p || yz_p || xyz_p)
2107 {
2108 ova1 = affine_fn_cst (integer_zero_node);
2109 ova2 = affine_fn_cst (integer_zero_node);
2110 ovb = affine_fn_cst (integer_zero_node);
2111 if (xz_p)
2112 {
2113 affine_fn t0 = ova1;
2114 affine_fn t2 = ovb;
2115
2116 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2117 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2118 affine_fn_free (t0);
2119 affine_fn_free (t2);
2120 *last_conflicts = last_conflicts_xz;
2121 }
2122 if (yz_p)
2123 {
2124 affine_fn t0 = ova2;
2125 affine_fn t2 = ovb;
2126
2127 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2128 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2129 affine_fn_free (t0);
2130 affine_fn_free (t2);
2131 *last_conflicts = last_conflicts_yz;
2132 }
2133 if (xyz_p)
2134 {
2135 affine_fn t0 = ova1;
2136 affine_fn t2 = ova2;
2137 affine_fn t4 = ovb;
2138
2139 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2140 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2141 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2142 affine_fn_free (t0);
2143 affine_fn_free (t2);
2144 affine_fn_free (t4);
2145 *last_conflicts = last_conflicts_xyz;
2146 }
2147 *overlaps_a = conflict_fn (2, ova1, ova2);
2148 *overlaps_b = conflict_fn (1, ovb);
2149 }
2150 else
2151 {
2152 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2153 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2154 *last_conflicts = integer_zero_node;
2155 }
2156
2157 affine_fn_free (overlaps_a_xz);
2158 affine_fn_free (overlaps_b_xz);
2159 affine_fn_free (overlaps_a_yz);
2160 affine_fn_free (overlaps_b_yz);
2161 affine_fn_free (overlaps_a_xyz);
2162 affine_fn_free (overlaps_b_xyz);
2163 }
2164
2165 /* Determines the overlapping elements due to accesses CHREC_A and
2166 CHREC_B, that are affine functions. This function cannot handle
2167 symbolic evolution functions, ie. when initial conditions are
2168 parameters, because it uses lambda matrices of integers. */
2169
2170 static void
2171 analyze_subscript_affine_affine (tree chrec_a,
2172 tree chrec_b,
2173 conflict_function **overlaps_a,
2174 conflict_function **overlaps_b,
2175 tree *last_conflicts)
2176 {
2177 unsigned nb_vars_a, nb_vars_b, dim;
2178 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2179 lambda_matrix A, U, S;
2180 struct obstack scratch_obstack;
2181
2182 if (eq_evolutions_p (chrec_a, chrec_b))
2183 {
2184 /* The accessed index overlaps for each iteration in the
2185 loop. */
2186 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2187 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2188 *last_conflicts = chrec_dont_know;
2189 return;
2190 }
2191 if (dump_file && (dump_flags & TDF_DETAILS))
2192 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2193
2194 /* For determining the initial intersection, we have to solve a
2195 Diophantine equation. This is the most time consuming part.
2196
2197 For answering to the question: "Is there a dependence?" we have
2198 to prove that there exists a solution to the Diophantine
2199 equation, and that the solution is in the iteration domain,
2200 i.e. the solution is positive or zero, and that the solution
2201 happens before the upper bound loop.nb_iterations. Otherwise
2202 there is no dependence. This function outputs a description of
2203 the iterations that hold the intersections. */
2204
2205 nb_vars_a = nb_vars_in_chrec (chrec_a);
2206 nb_vars_b = nb_vars_in_chrec (chrec_b);
2207
2208 gcc_obstack_init (&scratch_obstack);
2209
2210 dim = nb_vars_a + nb_vars_b;
2211 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2212 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2213 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2214
2215 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2216 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2217 gamma = init_b - init_a;
2218
2219 /* Don't do all the hard work of solving the Diophantine equation
2220 when we already know the solution: for example,
2221 | {3, +, 1}_1
2222 | {3, +, 4}_2
2223 | gamma = 3 - 3 = 0.
2224 Then the first overlap occurs during the first iterations:
2225 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2226 */
2227 if (gamma == 0)
2228 {
2229 if (nb_vars_a == 1 && nb_vars_b == 1)
2230 {
2231 HOST_WIDE_INT step_a, step_b;
2232 HOST_WIDE_INT niter, niter_a, niter_b;
2233 affine_fn ova, ovb;
2234
2235 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2236 false);
2237 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2238 false);
2239 niter = MIN (niter_a, niter_b);
2240 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2241 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2242
2243 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2244 &ova, &ovb,
2245 last_conflicts, 1);
2246 *overlaps_a = conflict_fn (1, ova);
2247 *overlaps_b = conflict_fn (1, ovb);
2248 }
2249
2250 else if (nb_vars_a == 2 && nb_vars_b == 1)
2251 compute_overlap_steps_for_affine_1_2
2252 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2253
2254 else if (nb_vars_a == 1 && nb_vars_b == 2)
2255 compute_overlap_steps_for_affine_1_2
2256 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2257
2258 else
2259 {
2260 if (dump_file && (dump_flags & TDF_DETAILS))
2261 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2262 *overlaps_a = conflict_fn_not_known ();
2263 *overlaps_b = conflict_fn_not_known ();
2264 *last_conflicts = chrec_dont_know;
2265 }
2266 goto end_analyze_subs_aa;
2267 }
2268
2269 /* U.A = S */
2270 lambda_matrix_right_hermite (A, dim, 1, S, U);
2271
2272 if (S[0][0] < 0)
2273 {
2274 S[0][0] *= -1;
2275 lambda_matrix_row_negate (U, dim, 0);
2276 }
2277 gcd_alpha_beta = S[0][0];
2278
2279 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2280 but that is a quite strange case. Instead of ICEing, answer
2281 don't know. */
2282 if (gcd_alpha_beta == 0)
2283 {
2284 *overlaps_a = conflict_fn_not_known ();
2285 *overlaps_b = conflict_fn_not_known ();
2286 *last_conflicts = chrec_dont_know;
2287 goto end_analyze_subs_aa;
2288 }
2289
2290 /* The classic "gcd-test". */
2291 if (!int_divides_p (gcd_alpha_beta, gamma))
2292 {
2293 /* The "gcd-test" has determined that there is no integer
2294 solution, i.e. there is no dependence. */
2295 *overlaps_a = conflict_fn_no_dependence ();
2296 *overlaps_b = conflict_fn_no_dependence ();
2297 *last_conflicts = integer_zero_node;
2298 }
2299
2300 /* Both access functions are univariate. This includes SIV and MIV cases. */
2301 else if (nb_vars_a == 1 && nb_vars_b == 1)
2302 {
2303 /* Both functions should have the same evolution sign. */
2304 if (((A[0][0] > 0 && -A[1][0] > 0)
2305 || (A[0][0] < 0 && -A[1][0] < 0)))
2306 {
2307 /* The solutions are given by:
2308 |
2309 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2310 | [u21 u22] [y0]
2311
2312 For a given integer t. Using the following variables,
2313
2314 | i0 = u11 * gamma / gcd_alpha_beta
2315 | j0 = u12 * gamma / gcd_alpha_beta
2316 | i1 = u21
2317 | j1 = u22
2318
2319 the solutions are:
2320
2321 | x0 = i0 + i1 * t,
2322 | y0 = j0 + j1 * t. */
2323 HOST_WIDE_INT i0, j0, i1, j1;
2324
2325 i0 = U[0][0] * gamma / gcd_alpha_beta;
2326 j0 = U[0][1] * gamma / gcd_alpha_beta;
2327 i1 = U[1][0];
2328 j1 = U[1][1];
2329
2330 if ((i1 == 0 && i0 < 0)
2331 || (j1 == 0 && j0 < 0))
2332 {
2333 /* There is no solution.
2334 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2335 falls in here, but for the moment we don't look at the
2336 upper bound of the iteration domain. */
2337 *overlaps_a = conflict_fn_no_dependence ();
2338 *overlaps_b = conflict_fn_no_dependence ();
2339 *last_conflicts = integer_zero_node;
2340 goto end_analyze_subs_aa;
2341 }
2342
2343 if (i1 > 0 && j1 > 0)
2344 {
2345 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2346 (get_chrec_loop (chrec_a), false);
2347 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2348 (get_chrec_loop (chrec_b), false);
2349 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2350
2351 /* (X0, Y0) is a solution of the Diophantine equation:
2352 "chrec_a (X0) = chrec_b (Y0)". */
2353 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2354 CEIL (-j0, j1));
2355 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2356 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2357
2358 /* (X1, Y1) is the smallest positive solution of the eq
2359 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2360 first conflict occurs. */
2361 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2362 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2363 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2364
2365 if (niter > 0)
2366 {
2367 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2368 FLOOR_DIV (niter - j0, j1));
2369 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2370
2371 /* If the overlap occurs outside of the bounds of the
2372 loop, there is no dependence. */
2373 if (x1 >= niter || y1 >= niter)
2374 {
2375 *overlaps_a = conflict_fn_no_dependence ();
2376 *overlaps_b = conflict_fn_no_dependence ();
2377 *last_conflicts = integer_zero_node;
2378 goto end_analyze_subs_aa;
2379 }
2380 else
2381 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2382 }
2383 else
2384 *last_conflicts = chrec_dont_know;
2385
2386 *overlaps_a
2387 = conflict_fn (1,
2388 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2389 1,
2390 build_int_cst (NULL_TREE, i1)));
2391 *overlaps_b
2392 = conflict_fn (1,
2393 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2394 1,
2395 build_int_cst (NULL_TREE, j1)));
2396 }
2397 else
2398 {
2399 /* FIXME: For the moment, the upper bound of the
2400 iteration domain for i and j is not checked. */
2401 if (dump_file && (dump_flags & TDF_DETAILS))
2402 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2403 *overlaps_a = conflict_fn_not_known ();
2404 *overlaps_b = conflict_fn_not_known ();
2405 *last_conflicts = chrec_dont_know;
2406 }
2407 }
2408 else
2409 {
2410 if (dump_file && (dump_flags & TDF_DETAILS))
2411 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2412 *overlaps_a = conflict_fn_not_known ();
2413 *overlaps_b = conflict_fn_not_known ();
2414 *last_conflicts = chrec_dont_know;
2415 }
2416 }
2417 else
2418 {
2419 if (dump_file && (dump_flags & TDF_DETAILS))
2420 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2421 *overlaps_a = conflict_fn_not_known ();
2422 *overlaps_b = conflict_fn_not_known ();
2423 *last_conflicts = chrec_dont_know;
2424 }
2425
2426 end_analyze_subs_aa:
2427 obstack_free (&scratch_obstack, NULL);
2428 if (dump_file && (dump_flags & TDF_DETAILS))
2429 {
2430 fprintf (dump_file, " (overlaps_a = ");
2431 dump_conflict_function (dump_file, *overlaps_a);
2432 fprintf (dump_file, ")\n (overlaps_b = ");
2433 dump_conflict_function (dump_file, *overlaps_b);
2434 fprintf (dump_file, ")\n");
2435 fprintf (dump_file, ")\n");
2436 }
2437 }
2438
2439 /* Returns true when analyze_subscript_affine_affine can be used for
2440 determining the dependence relation between chrec_a and chrec_b,
2441 that contain symbols. This function modifies chrec_a and chrec_b
2442 such that the analysis result is the same, and such that they don't
2443 contain symbols, and then can safely be passed to the analyzer.
2444
2445 Example: The analysis of the following tuples of evolutions produce
2446 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2447 vs. {0, +, 1}_1
2448
2449 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2450 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2451 */
2452
2453 static bool
2454 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2455 {
2456 tree diff, type, left_a, left_b, right_b;
2457
2458 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2459 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2460 /* FIXME: For the moment not handled. Might be refined later. */
2461 return false;
2462
2463 type = chrec_type (*chrec_a);
2464 left_a = CHREC_LEFT (*chrec_a);
2465 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2466 diff = chrec_fold_minus (type, left_a, left_b);
2467
2468 if (!evolution_function_is_constant_p (diff))
2469 return false;
2470
2471 if (dump_file && (dump_flags & TDF_DETAILS))
2472 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2473
2474 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2475 diff, CHREC_RIGHT (*chrec_a));
2476 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2477 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2478 build_int_cst (type, 0),
2479 right_b);
2480 return true;
2481 }
2482
2483 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2484 *OVERLAPS_B are initialized to the functions that describe the
2485 relation between the elements accessed twice by CHREC_A and
2486 CHREC_B. For k >= 0, the following property is verified:
2487
2488 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2489
2490 static void
2491 analyze_siv_subscript (tree chrec_a,
2492 tree chrec_b,
2493 conflict_function **overlaps_a,
2494 conflict_function **overlaps_b,
2495 tree *last_conflicts,
2496 int loop_nest_num)
2497 {
2498 dependence_stats.num_siv++;
2499
2500 if (dump_file && (dump_flags & TDF_DETAILS))
2501 fprintf (dump_file, "(analyze_siv_subscript \n");
2502
2503 if (evolution_function_is_constant_p (chrec_a)
2504 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2505 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2506 overlaps_a, overlaps_b, last_conflicts);
2507
2508 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2509 && evolution_function_is_constant_p (chrec_b))
2510 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2511 overlaps_b, overlaps_a, last_conflicts);
2512
2513 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2514 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2515 {
2516 if (!chrec_contains_symbols (chrec_a)
2517 && !chrec_contains_symbols (chrec_b))
2518 {
2519 analyze_subscript_affine_affine (chrec_a, chrec_b,
2520 overlaps_a, overlaps_b,
2521 last_conflicts);
2522
2523 if (CF_NOT_KNOWN_P (*overlaps_a)
2524 || CF_NOT_KNOWN_P (*overlaps_b))
2525 dependence_stats.num_siv_unimplemented++;
2526 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2527 || CF_NO_DEPENDENCE_P (*overlaps_b))
2528 dependence_stats.num_siv_independent++;
2529 else
2530 dependence_stats.num_siv_dependent++;
2531 }
2532 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2533 &chrec_b))
2534 {
2535 analyze_subscript_affine_affine (chrec_a, chrec_b,
2536 overlaps_a, overlaps_b,
2537 last_conflicts);
2538
2539 if (CF_NOT_KNOWN_P (*overlaps_a)
2540 || CF_NOT_KNOWN_P (*overlaps_b))
2541 dependence_stats.num_siv_unimplemented++;
2542 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2543 || CF_NO_DEPENDENCE_P (*overlaps_b))
2544 dependence_stats.num_siv_independent++;
2545 else
2546 dependence_stats.num_siv_dependent++;
2547 }
2548 else
2549 goto siv_subscript_dontknow;
2550 }
2551
2552 else
2553 {
2554 siv_subscript_dontknow:;
2555 if (dump_file && (dump_flags & TDF_DETAILS))
2556 fprintf (dump_file, "siv test failed: unimplemented.\n");
2557 *overlaps_a = conflict_fn_not_known ();
2558 *overlaps_b = conflict_fn_not_known ();
2559 *last_conflicts = chrec_dont_know;
2560 dependence_stats.num_siv_unimplemented++;
2561 }
2562
2563 if (dump_file && (dump_flags & TDF_DETAILS))
2564 fprintf (dump_file, ")\n");
2565 }
2566
2567 /* Returns false if we can prove that the greatest common divisor of the steps
2568 of CHREC does not divide CST, false otherwise. */
2569
2570 static bool
2571 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2572 {
2573 HOST_WIDE_INT cd = 0, val;
2574 tree step;
2575
2576 if (!host_integerp (cst, 0))
2577 return true;
2578 val = tree_low_cst (cst, 0);
2579
2580 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2581 {
2582 step = CHREC_RIGHT (chrec);
2583 if (!host_integerp (step, 0))
2584 return true;
2585 cd = gcd (cd, tree_low_cst (step, 0));
2586 chrec = CHREC_LEFT (chrec);
2587 }
2588
2589 return val % cd == 0;
2590 }
2591
2592 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2593 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2594 functions that describe the relation between the elements accessed
2595 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2596 is verified:
2597
2598 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2599
2600 static void
2601 analyze_miv_subscript (tree chrec_a,
2602 tree chrec_b,
2603 conflict_function **overlaps_a,
2604 conflict_function **overlaps_b,
2605 tree *last_conflicts,
2606 struct loop *loop_nest)
2607 {
2608 /* FIXME: This is a MIV subscript, not yet handled.
2609 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2610 (A[i] vs. A[j]).
2611
2612 In the SIV test we had to solve a Diophantine equation with two
2613 variables. In the MIV case we have to solve a Diophantine
2614 equation with 2*n variables (if the subscript uses n IVs).
2615 */
2616 tree type, difference;
2617
2618 dependence_stats.num_miv++;
2619 if (dump_file && (dump_flags & TDF_DETAILS))
2620 fprintf (dump_file, "(analyze_miv_subscript \n");
2621
2622 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2623 chrec_a = chrec_convert (type, chrec_a, NULL);
2624 chrec_b = chrec_convert (type, chrec_b, NULL);
2625 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2626
2627 if (eq_evolutions_p (chrec_a, chrec_b))
2628 {
2629 /* Access functions are the same: all the elements are accessed
2630 in the same order. */
2631 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2632 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2633 *last_conflicts = estimated_loop_iterations_tree
2634 (get_chrec_loop (chrec_a), true);
2635 dependence_stats.num_miv_dependent++;
2636 }
2637
2638 else if (evolution_function_is_constant_p (difference)
2639 /* For the moment, the following is verified:
2640 evolution_function_is_affine_multivariate_p (chrec_a,
2641 loop_nest->num) */
2642 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2643 {
2644 /* testsuite/.../ssa-chrec-33.c
2645 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2646
2647 The difference is 1, and all the evolution steps are multiples
2648 of 2, consequently there are no overlapping elements. */
2649 *overlaps_a = conflict_fn_no_dependence ();
2650 *overlaps_b = conflict_fn_no_dependence ();
2651 *last_conflicts = integer_zero_node;
2652 dependence_stats.num_miv_independent++;
2653 }
2654
2655 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2656 && !chrec_contains_symbols (chrec_a)
2657 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2658 && !chrec_contains_symbols (chrec_b))
2659 {
2660 /* testsuite/.../ssa-chrec-35.c
2661 {0, +, 1}_2 vs. {0, +, 1}_3
2662 the overlapping elements are respectively located at iterations:
2663 {0, +, 1}_x and {0, +, 1}_x,
2664 in other words, we have the equality:
2665 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2666
2667 Other examples:
2668 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2669 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2670
2671 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2672 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2673 */
2674 analyze_subscript_affine_affine (chrec_a, chrec_b,
2675 overlaps_a, overlaps_b, last_conflicts);
2676
2677 if (CF_NOT_KNOWN_P (*overlaps_a)
2678 || CF_NOT_KNOWN_P (*overlaps_b))
2679 dependence_stats.num_miv_unimplemented++;
2680 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2681 || CF_NO_DEPENDENCE_P (*overlaps_b))
2682 dependence_stats.num_miv_independent++;
2683 else
2684 dependence_stats.num_miv_dependent++;
2685 }
2686
2687 else
2688 {
2689 /* When the analysis is too difficult, answer "don't know". */
2690 if (dump_file && (dump_flags & TDF_DETAILS))
2691 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2692
2693 *overlaps_a = conflict_fn_not_known ();
2694 *overlaps_b = conflict_fn_not_known ();
2695 *last_conflicts = chrec_dont_know;
2696 dependence_stats.num_miv_unimplemented++;
2697 }
2698
2699 if (dump_file && (dump_flags & TDF_DETAILS))
2700 fprintf (dump_file, ")\n");
2701 }
2702
2703 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2704 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2705 OVERLAP_ITERATIONS_B are initialized with two functions that
2706 describe the iterations that contain conflicting elements.
2707
2708 Remark: For an integer k >= 0, the following equality is true:
2709
2710 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2711 */
2712
2713 static void
2714 analyze_overlapping_iterations (tree chrec_a,
2715 tree chrec_b,
2716 conflict_function **overlap_iterations_a,
2717 conflict_function **overlap_iterations_b,
2718 tree *last_conflicts, struct loop *loop_nest)
2719 {
2720 unsigned int lnn = loop_nest->num;
2721
2722 dependence_stats.num_subscript_tests++;
2723
2724 if (dump_file && (dump_flags & TDF_DETAILS))
2725 {
2726 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2727 fprintf (dump_file, " (chrec_a = ");
2728 print_generic_expr (dump_file, chrec_a, 0);
2729 fprintf (dump_file, ")\n (chrec_b = ");
2730 print_generic_expr (dump_file, chrec_b, 0);
2731 fprintf (dump_file, ")\n");
2732 }
2733
2734 if (chrec_a == NULL_TREE
2735 || chrec_b == NULL_TREE
2736 || chrec_contains_undetermined (chrec_a)
2737 || chrec_contains_undetermined (chrec_b))
2738 {
2739 dependence_stats.num_subscript_undetermined++;
2740
2741 *overlap_iterations_a = conflict_fn_not_known ();
2742 *overlap_iterations_b = conflict_fn_not_known ();
2743 }
2744
2745 /* If they are the same chrec, and are affine, they overlap
2746 on every iteration. */
2747 else if (eq_evolutions_p (chrec_a, chrec_b)
2748 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2749 {
2750 dependence_stats.num_same_subscript_function++;
2751 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2752 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2753 *last_conflicts = chrec_dont_know;
2754 }
2755
2756 /* If they aren't the same, and aren't affine, we can't do anything
2757 yet. */
2758 else if ((chrec_contains_symbols (chrec_a)
2759 || chrec_contains_symbols (chrec_b))
2760 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2761 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2762 {
2763 dependence_stats.num_subscript_undetermined++;
2764 *overlap_iterations_a = conflict_fn_not_known ();
2765 *overlap_iterations_b = conflict_fn_not_known ();
2766 }
2767
2768 else if (ziv_subscript_p (chrec_a, chrec_b))
2769 analyze_ziv_subscript (chrec_a, chrec_b,
2770 overlap_iterations_a, overlap_iterations_b,
2771 last_conflicts);
2772
2773 else if (siv_subscript_p (chrec_a, chrec_b))
2774 analyze_siv_subscript (chrec_a, chrec_b,
2775 overlap_iterations_a, overlap_iterations_b,
2776 last_conflicts, lnn);
2777
2778 else
2779 analyze_miv_subscript (chrec_a, chrec_b,
2780 overlap_iterations_a, overlap_iterations_b,
2781 last_conflicts, loop_nest);
2782
2783 if (dump_file && (dump_flags & TDF_DETAILS))
2784 {
2785 fprintf (dump_file, " (overlap_iterations_a = ");
2786 dump_conflict_function (dump_file, *overlap_iterations_a);
2787 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2788 dump_conflict_function (dump_file, *overlap_iterations_b);
2789 fprintf (dump_file, ")\n");
2790 fprintf (dump_file, ")\n");
2791 }
2792 }
2793
2794 /* Helper function for uniquely inserting distance vectors. */
2795
2796 static void
2797 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2798 {
2799 unsigned i;
2800 lambda_vector v;
2801
2802 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2803 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2804 return;
2805
2806 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2807 }
2808
2809 /* Helper function for uniquely inserting direction vectors. */
2810
2811 static void
2812 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2813 {
2814 unsigned i;
2815 lambda_vector v;
2816
2817 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2818 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2819 return;
2820
2821 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2822 }
2823
2824 /* Add a distance of 1 on all the loops outer than INDEX. If we
2825 haven't yet determined a distance for this outer loop, push a new
2826 distance vector composed of the previous distance, and a distance
2827 of 1 for this outer loop. Example:
2828
2829 | loop_1
2830 | loop_2
2831 | A[10]
2832 | endloop_2
2833 | endloop_1
2834
2835 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2836 save (0, 1), then we have to save (1, 0). */
2837
2838 static void
2839 add_outer_distances (struct data_dependence_relation *ddr,
2840 lambda_vector dist_v, int index)
2841 {
2842 /* For each outer loop where init_v is not set, the accesses are
2843 in dependence of distance 1 in the loop. */
2844 while (--index >= 0)
2845 {
2846 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2847 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2848 save_v[index] = 1;
2849 save_dist_v (ddr, save_v);
2850 }
2851 }
2852
2853 /* Return false when fail to represent the data dependence as a
2854 distance vector. INIT_B is set to true when a component has been
2855 added to the distance vector DIST_V. INDEX_CARRY is then set to
2856 the index in DIST_V that carries the dependence. */
2857
2858 static bool
2859 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2860 struct data_reference *ddr_a,
2861 struct data_reference *ddr_b,
2862 lambda_vector dist_v, bool *init_b,
2863 int *index_carry)
2864 {
2865 unsigned i;
2866 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2867
2868 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2869 {
2870 tree access_fn_a, access_fn_b;
2871 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2872
2873 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2874 {
2875 non_affine_dependence_relation (ddr);
2876 return false;
2877 }
2878
2879 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2880 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2881
2882 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2883 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2884 {
2885 int dist, index;
2886 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2887 DDR_LOOP_NEST (ddr));
2888 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2889 DDR_LOOP_NEST (ddr));
2890
2891 /* The dependence is carried by the outermost loop. Example:
2892 | loop_1
2893 | A[{4, +, 1}_1]
2894 | loop_2
2895 | A[{5, +, 1}_2]
2896 | endloop_2
2897 | endloop_1
2898 In this case, the dependence is carried by loop_1. */
2899 index = index_a < index_b ? index_a : index_b;
2900 *index_carry = MIN (index, *index_carry);
2901
2902 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2903 {
2904 non_affine_dependence_relation (ddr);
2905 return false;
2906 }
2907
2908 dist = int_cst_value (SUB_DISTANCE (subscript));
2909
2910 /* This is the subscript coupling test. If we have already
2911 recorded a distance for this loop (a distance coming from
2912 another subscript), it should be the same. For example,
2913 in the following code, there is no dependence:
2914
2915 | loop i = 0, N, 1
2916 | T[i+1][i] = ...
2917 | ... = T[i][i]
2918 | endloop
2919 */
2920 if (init_v[index] != 0 && dist_v[index] != dist)
2921 {
2922 finalize_ddr_dependent (ddr, chrec_known);
2923 return false;
2924 }
2925
2926 dist_v[index] = dist;
2927 init_v[index] = 1;
2928 *init_b = true;
2929 }
2930 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2931 {
2932 /* This can be for example an affine vs. constant dependence
2933 (T[i] vs. T[3]) that is not an affine dependence and is
2934 not representable as a distance vector. */
2935 non_affine_dependence_relation (ddr);
2936 return false;
2937 }
2938 }
2939
2940 return true;
2941 }
2942
2943 /* Return true when the DDR contains only constant access functions. */
2944
2945 static bool
2946 constant_access_functions (const struct data_dependence_relation *ddr)
2947 {
2948 unsigned i;
2949
2950 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2951 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2952 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2953 return false;
2954
2955 return true;
2956 }
2957
2958 /* Helper function for the case where DDR_A and DDR_B are the same
2959 multivariate access function with a constant step. For an example
2960 see pr34635-1.c. */
2961
2962 static void
2963 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2964 {
2965 int x_1, x_2;
2966 tree c_1 = CHREC_LEFT (c_2);
2967 tree c_0 = CHREC_LEFT (c_1);
2968 lambda_vector dist_v;
2969 int v1, v2, cd;
2970
2971 /* Polynomials with more than 2 variables are not handled yet. When
2972 the evolution steps are parameters, it is not possible to
2973 represent the dependence using classical distance vectors. */
2974 if (TREE_CODE (c_0) != INTEGER_CST
2975 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2976 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2977 {
2978 DDR_AFFINE_P (ddr) = false;
2979 return;
2980 }
2981
2982 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2983 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2984
2985 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2986 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2987 v1 = int_cst_value (CHREC_RIGHT (c_1));
2988 v2 = int_cst_value (CHREC_RIGHT (c_2));
2989 cd = gcd (v1, v2);
2990 v1 /= cd;
2991 v2 /= cd;
2992
2993 if (v2 < 0)
2994 {
2995 v2 = -v2;
2996 v1 = -v1;
2997 }
2998
2999 dist_v[x_1] = v2;
3000 dist_v[x_2] = -v1;
3001 save_dist_v (ddr, dist_v);
3002
3003 add_outer_distances (ddr, dist_v, x_1);
3004 }
3005
3006 /* Helper function for the case where DDR_A and DDR_B are the same
3007 access functions. */
3008
3009 static void
3010 add_other_self_distances (struct data_dependence_relation *ddr)
3011 {
3012 lambda_vector dist_v;
3013 unsigned i;
3014 int index_carry = DDR_NB_LOOPS (ddr);
3015
3016 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3017 {
3018 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3019
3020 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3021 {
3022 if (!evolution_function_is_univariate_p (access_fun))
3023 {
3024 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3025 {
3026 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3027 return;
3028 }
3029
3030 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3031
3032 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3033 add_multivariate_self_dist (ddr, access_fun);
3034 else
3035 /* The evolution step is not constant: it varies in
3036 the outer loop, so this cannot be represented by a
3037 distance vector. For example in pr34635.c the
3038 evolution is {0, +, {0, +, 4}_1}_2. */
3039 DDR_AFFINE_P (ddr) = false;
3040
3041 return;
3042 }
3043
3044 index_carry = MIN (index_carry,
3045 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3046 DDR_LOOP_NEST (ddr)));
3047 }
3048 }
3049
3050 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3051 add_outer_distances (ddr, dist_v, index_carry);
3052 }
3053
3054 static void
3055 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3056 {
3057 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3058
3059 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3060 save_dist_v (ddr, dist_v);
3061 }
3062
3063 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3064 is the case for example when access functions are the same and
3065 equal to a constant, as in:
3066
3067 | loop_1
3068 | A[3] = ...
3069 | ... = A[3]
3070 | endloop_1
3071
3072 in which case the distance vectors are (0) and (1). */
3073
3074 static void
3075 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3076 {
3077 unsigned i, j;
3078
3079 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3080 {
3081 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3082 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3083 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3084
3085 for (j = 0; j < ca->n; j++)
3086 if (affine_function_zero_p (ca->fns[j]))
3087 {
3088 insert_innermost_unit_dist_vector (ddr);
3089 return;
3090 }
3091
3092 for (j = 0; j < cb->n; j++)
3093 if (affine_function_zero_p (cb->fns[j]))
3094 {
3095 insert_innermost_unit_dist_vector (ddr);
3096 return;
3097 }
3098 }
3099 }
3100
3101 /* Compute the classic per loop distance vector. DDR is the data
3102 dependence relation to build a vector from. Return false when fail
3103 to represent the data dependence as a distance vector. */
3104
3105 static bool
3106 build_classic_dist_vector (struct data_dependence_relation *ddr,
3107 struct loop *loop_nest)
3108 {
3109 bool init_b = false;
3110 int index_carry = DDR_NB_LOOPS (ddr);
3111 lambda_vector dist_v;
3112
3113 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3114 return false;
3115
3116 if (same_access_functions (ddr))
3117 {
3118 /* Save the 0 vector. */
3119 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3120 save_dist_v (ddr, dist_v);
3121
3122 if (constant_access_functions (ddr))
3123 add_distance_for_zero_overlaps (ddr);
3124
3125 if (DDR_NB_LOOPS (ddr) > 1)
3126 add_other_self_distances (ddr);
3127
3128 return true;
3129 }
3130
3131 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3132 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3133 dist_v, &init_b, &index_carry))
3134 return false;
3135
3136 /* Save the distance vector if we initialized one. */
3137 if (init_b)
3138 {
3139 /* Verify a basic constraint: classic distance vectors should
3140 always be lexicographically positive.
3141
3142 Data references are collected in the order of execution of
3143 the program, thus for the following loop
3144
3145 | for (i = 1; i < 100; i++)
3146 | for (j = 1; j < 100; j++)
3147 | {
3148 | t = T[j+1][i-1]; // A
3149 | T[j][i] = t + 2; // B
3150 | }
3151
3152 references are collected following the direction of the wind:
3153 A then B. The data dependence tests are performed also
3154 following this order, such that we're looking at the distance
3155 separating the elements accessed by A from the elements later
3156 accessed by B. But in this example, the distance returned by
3157 test_dep (A, B) is lexicographically negative (-1, 1), that
3158 means that the access A occurs later than B with respect to
3159 the outer loop, ie. we're actually looking upwind. In this
3160 case we solve test_dep (B, A) looking downwind to the
3161 lexicographically positive solution, that returns the
3162 distance vector (1, -1). */
3163 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3164 {
3165 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3166 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3167 loop_nest))
3168 return false;
3169 compute_subscript_distance (ddr);
3170 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3171 save_v, &init_b, &index_carry))
3172 return false;
3173 save_dist_v (ddr, save_v);
3174 DDR_REVERSED_P (ddr) = true;
3175
3176 /* In this case there is a dependence forward for all the
3177 outer loops:
3178
3179 | for (k = 1; k < 100; k++)
3180 | for (i = 1; i < 100; i++)
3181 | for (j = 1; j < 100; j++)
3182 | {
3183 | t = T[j+1][i-1]; // A
3184 | T[j][i] = t + 2; // B
3185 | }
3186
3187 the vectors are:
3188 (0, 1, -1)
3189 (1, 1, -1)
3190 (1, -1, 1)
3191 */
3192 if (DDR_NB_LOOPS (ddr) > 1)
3193 {
3194 add_outer_distances (ddr, save_v, index_carry);
3195 add_outer_distances (ddr, dist_v, index_carry);
3196 }
3197 }
3198 else
3199 {
3200 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3201 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3202
3203 if (DDR_NB_LOOPS (ddr) > 1)
3204 {
3205 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3206
3207 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3208 DDR_A (ddr), loop_nest))
3209 return false;
3210 compute_subscript_distance (ddr);
3211 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3212 opposite_v, &init_b,
3213 &index_carry))
3214 return false;
3215
3216 save_dist_v (ddr, save_v);
3217 add_outer_distances (ddr, dist_v, index_carry);
3218 add_outer_distances (ddr, opposite_v, index_carry);
3219 }
3220 else
3221 save_dist_v (ddr, save_v);
3222 }
3223 }
3224 else
3225 {
3226 /* There is a distance of 1 on all the outer loops: Example:
3227 there is a dependence of distance 1 on loop_1 for the array A.
3228
3229 | loop_1
3230 | A[5] = ...
3231 | endloop
3232 */
3233 add_outer_distances (ddr, dist_v,
3234 lambda_vector_first_nz (dist_v,
3235 DDR_NB_LOOPS (ddr), 0));
3236 }
3237
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3239 {
3240 unsigned i;
3241
3242 fprintf (dump_file, "(build_classic_dist_vector\n");
3243 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3244 {
3245 fprintf (dump_file, " dist_vector = (");
3246 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3247 DDR_NB_LOOPS (ddr));
3248 fprintf (dump_file, " )\n");
3249 }
3250 fprintf (dump_file, ")\n");
3251 }
3252
3253 return true;
3254 }
3255
3256 /* Return the direction for a given distance.
3257 FIXME: Computing dir this way is suboptimal, since dir can catch
3258 cases that dist is unable to represent. */
3259
3260 static inline enum data_dependence_direction
3261 dir_from_dist (int dist)
3262 {
3263 if (dist > 0)
3264 return dir_positive;
3265 else if (dist < 0)
3266 return dir_negative;
3267 else
3268 return dir_equal;
3269 }
3270
3271 /* Compute the classic per loop direction vector. DDR is the data
3272 dependence relation to build a vector from. */
3273
3274 static void
3275 build_classic_dir_vector (struct data_dependence_relation *ddr)
3276 {
3277 unsigned i, j;
3278 lambda_vector dist_v;
3279
3280 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3281 {
3282 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3283
3284 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3285 dir_v[j] = dir_from_dist (dist_v[j]);
3286
3287 save_dir_v (ddr, dir_v);
3288 }
3289 }
3290
3291 /* Helper function. Returns true when there is a dependence between
3292 data references DRA and DRB. */
3293
3294 static bool
3295 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3296 struct data_reference *dra,
3297 struct data_reference *drb,
3298 struct loop *loop_nest)
3299 {
3300 unsigned int i;
3301 tree last_conflicts;
3302 struct subscript *subscript;
3303
3304 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3305 i++)
3306 {
3307 conflict_function *overlaps_a, *overlaps_b;
3308
3309 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3310 DR_ACCESS_FN (drb, i),
3311 &overlaps_a, &overlaps_b,
3312 &last_conflicts, loop_nest);
3313
3314 if (CF_NOT_KNOWN_P (overlaps_a)
3315 || CF_NOT_KNOWN_P (overlaps_b))
3316 {
3317 finalize_ddr_dependent (ddr, chrec_dont_know);
3318 dependence_stats.num_dependence_undetermined++;
3319 free_conflict_function (overlaps_a);
3320 free_conflict_function (overlaps_b);
3321 return false;
3322 }
3323
3324 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3325 || CF_NO_DEPENDENCE_P (overlaps_b))
3326 {
3327 finalize_ddr_dependent (ddr, chrec_known);
3328 dependence_stats.num_dependence_independent++;
3329 free_conflict_function (overlaps_a);
3330 free_conflict_function (overlaps_b);
3331 return false;
3332 }
3333
3334 else
3335 {
3336 if (SUB_CONFLICTS_IN_A (subscript))
3337 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3338 if (SUB_CONFLICTS_IN_B (subscript))
3339 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3340
3341 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3342 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3343 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3344 }
3345 }
3346
3347 return true;
3348 }
3349
3350 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3351
3352 static void
3353 subscript_dependence_tester (struct data_dependence_relation *ddr,
3354 struct loop *loop_nest)
3355 {
3356
3357 if (dump_file && (dump_flags & TDF_DETAILS))
3358 fprintf (dump_file, "(subscript_dependence_tester \n");
3359
3360 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3361 dependence_stats.num_dependence_dependent++;
3362
3363 compute_subscript_distance (ddr);
3364 if (build_classic_dist_vector (ddr, loop_nest))
3365 build_classic_dir_vector (ddr);
3366
3367 if (dump_file && (dump_flags & TDF_DETAILS))
3368 fprintf (dump_file, ")\n");
3369 }
3370
3371 /* Returns true when all the access functions of A are affine or
3372 constant with respect to LOOP_NEST. */
3373
3374 static bool
3375 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3376 const struct loop *loop_nest)
3377 {
3378 unsigned int i;
3379 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3380 tree t;
3381
3382 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3383 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3384 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3385 return false;
3386
3387 return true;
3388 }
3389
3390 /* Initializes an equation for an OMEGA problem using the information
3391 contained in the ACCESS_FUN. Returns true when the operation
3392 succeeded.
3393
3394 PB is the omega constraint system.
3395 EQ is the number of the equation to be initialized.
3396 OFFSET is used for shifting the variables names in the constraints:
3397 a constrain is composed of 2 * the number of variables surrounding
3398 dependence accesses. OFFSET is set either to 0 for the first n variables,
3399 then it is set to n.
3400 ACCESS_FUN is expected to be an affine chrec. */
3401
3402 static bool
3403 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3404 unsigned int offset, tree access_fun,
3405 struct data_dependence_relation *ddr)
3406 {
3407 switch (TREE_CODE (access_fun))
3408 {
3409 case POLYNOMIAL_CHREC:
3410 {
3411 tree left = CHREC_LEFT (access_fun);
3412 tree right = CHREC_RIGHT (access_fun);
3413 int var = CHREC_VARIABLE (access_fun);
3414 unsigned var_idx;
3415
3416 if (TREE_CODE (right) != INTEGER_CST)
3417 return false;
3418
3419 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3420 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3421
3422 /* Compute the innermost loop index. */
3423 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3424
3425 if (offset == 0)
3426 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3427 += int_cst_value (right);
3428
3429 switch (TREE_CODE (left))
3430 {
3431 case POLYNOMIAL_CHREC:
3432 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3433
3434 case INTEGER_CST:
3435 pb->eqs[eq].coef[0] += int_cst_value (left);
3436 return true;
3437
3438 default:
3439 return false;
3440 }
3441 }
3442
3443 case INTEGER_CST:
3444 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3445 return true;
3446
3447 default:
3448 return false;
3449 }
3450 }
3451
3452 /* As explained in the comments preceding init_omega_for_ddr, we have
3453 to set up a system for each loop level, setting outer loops
3454 variation to zero, and current loop variation to positive or zero.
3455 Save each lexico positive distance vector. */
3456
3457 static void
3458 omega_extract_distance_vectors (omega_pb pb,
3459 struct data_dependence_relation *ddr)
3460 {
3461 int eq, geq;
3462 unsigned i, j;
3463 struct loop *loopi, *loopj;
3464 enum omega_result res;
3465
3466 /* Set a new problem for each loop in the nest. The basis is the
3467 problem that we have initialized until now. On top of this we
3468 add new constraints. */
3469 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3470 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3471 {
3472 int dist = 0;
3473 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3474 DDR_NB_LOOPS (ddr));
3475
3476 omega_copy_problem (copy, pb);
3477
3478 /* For all the outer loops "loop_j", add "dj = 0". */
3479 for (j = 0;
3480 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3481 {
3482 eq = omega_add_zero_eq (copy, omega_black);
3483 copy->eqs[eq].coef[j + 1] = 1;
3484 }
3485
3486 /* For "loop_i", add "0 <= di". */
3487 geq = omega_add_zero_geq (copy, omega_black);
3488 copy->geqs[geq].coef[i + 1] = 1;
3489
3490 /* Reduce the constraint system, and test that the current
3491 problem is feasible. */
3492 res = omega_simplify_problem (copy);
3493 if (res == omega_false
3494 || res == omega_unknown
3495 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3496 goto next_problem;
3497
3498 for (eq = 0; eq < copy->num_subs; eq++)
3499 if (copy->subs[eq].key == (int) i + 1)
3500 {
3501 dist = copy->subs[eq].coef[0];
3502 goto found_dist;
3503 }
3504
3505 if (dist == 0)
3506 {
3507 /* Reinitialize problem... */
3508 omega_copy_problem (copy, pb);
3509 for (j = 0;
3510 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3511 {
3512 eq = omega_add_zero_eq (copy, omega_black);
3513 copy->eqs[eq].coef[j + 1] = 1;
3514 }
3515
3516 /* ..., but this time "di = 1". */
3517 eq = omega_add_zero_eq (copy, omega_black);
3518 copy->eqs[eq].coef[i + 1] = 1;
3519 copy->eqs[eq].coef[0] = -1;
3520
3521 res = omega_simplify_problem (copy);
3522 if (res == omega_false
3523 || res == omega_unknown
3524 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3525 goto next_problem;
3526
3527 for (eq = 0; eq < copy->num_subs; eq++)
3528 if (copy->subs[eq].key == (int) i + 1)
3529 {
3530 dist = copy->subs[eq].coef[0];
3531 goto found_dist;
3532 }
3533 }
3534
3535 found_dist:;
3536 /* Save the lexicographically positive distance vector. */
3537 if (dist >= 0)
3538 {
3539 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3540 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3541
3542 dist_v[i] = dist;
3543
3544 for (eq = 0; eq < copy->num_subs; eq++)
3545 if (copy->subs[eq].key > 0)
3546 {
3547 dist = copy->subs[eq].coef[0];
3548 dist_v[copy->subs[eq].key - 1] = dist;
3549 }
3550
3551 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3552 dir_v[j] = dir_from_dist (dist_v[j]);
3553
3554 save_dist_v (ddr, dist_v);
3555 save_dir_v (ddr, dir_v);
3556 }
3557
3558 next_problem:;
3559 omega_free_problem (copy);
3560 }
3561 }
3562
3563 /* This is called for each subscript of a tuple of data references:
3564 insert an equality for representing the conflicts. */
3565
3566 static bool
3567 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3568 struct data_dependence_relation *ddr,
3569 omega_pb pb, bool *maybe_dependent)
3570 {
3571 int eq;
3572 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3573 TREE_TYPE (access_fun_b));
3574 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3575 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3576 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3577
3578 /* When the fun_a - fun_b is not constant, the dependence is not
3579 captured by the classic distance vector representation. */
3580 if (TREE_CODE (difference) != INTEGER_CST)
3581 return false;
3582
3583 /* ZIV test. */
3584 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3585 {
3586 /* There is no dependence. */
3587 *maybe_dependent = false;
3588 return true;
3589 }
3590
3591 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3592
3593 eq = omega_add_zero_eq (pb, omega_black);
3594 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3595 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3596 /* There is probably a dependence, but the system of
3597 constraints cannot be built: answer "don't know". */
3598 return false;
3599
3600 /* GCD test. */
3601 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3602 && !int_divides_p (lambda_vector_gcd
3603 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3604 2 * DDR_NB_LOOPS (ddr)),
3605 pb->eqs[eq].coef[0]))
3606 {
3607 /* There is no dependence. */
3608 *maybe_dependent = false;
3609 return true;
3610 }
3611
3612 return true;
3613 }
3614
3615 /* Helper function, same as init_omega_for_ddr but specialized for
3616 data references A and B. */
3617
3618 static bool
3619 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3620 struct data_dependence_relation *ddr,
3621 omega_pb pb, bool *maybe_dependent)
3622 {
3623 unsigned i;
3624 int ineq;
3625 struct loop *loopi;
3626 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3627
3628 /* Insert an equality per subscript. */
3629 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3630 {
3631 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3632 ddr, pb, maybe_dependent))
3633 return false;
3634 else if (*maybe_dependent == false)
3635 {
3636 /* There is no dependence. */
3637 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3638 return true;
3639 }
3640 }
3641
3642 /* Insert inequalities: constraints corresponding to the iteration
3643 domain, i.e. the loops surrounding the references "loop_x" and
3644 the distance variables "dx". The layout of the OMEGA
3645 representation is as follows:
3646 - coef[0] is the constant
3647 - coef[1..nb_loops] are the protected variables that will not be
3648 removed by the solver: the "dx"
3649 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3650 */
3651 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3652 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3653 {
3654 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3655
3656 /* 0 <= loop_x */
3657 ineq = omega_add_zero_geq (pb, omega_black);
3658 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3659
3660 /* 0 <= loop_x + dx */
3661 ineq = omega_add_zero_geq (pb, omega_black);
3662 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3663 pb->geqs[ineq].coef[i + 1] = 1;
3664
3665 if (nbi != -1)
3666 {
3667 /* loop_x <= nb_iters */
3668 ineq = omega_add_zero_geq (pb, omega_black);
3669 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3670 pb->geqs[ineq].coef[0] = nbi;
3671
3672 /* loop_x + dx <= nb_iters */
3673 ineq = omega_add_zero_geq (pb, omega_black);
3674 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3675 pb->geqs[ineq].coef[i + 1] = -1;
3676 pb->geqs[ineq].coef[0] = nbi;
3677
3678 /* A step "dx" bigger than nb_iters is not feasible, so
3679 add "0 <= nb_iters + dx", */
3680 ineq = omega_add_zero_geq (pb, omega_black);
3681 pb->geqs[ineq].coef[i + 1] = 1;
3682 pb->geqs[ineq].coef[0] = nbi;
3683 /* and "dx <= nb_iters". */
3684 ineq = omega_add_zero_geq (pb, omega_black);
3685 pb->geqs[ineq].coef[i + 1] = -1;
3686 pb->geqs[ineq].coef[0] = nbi;
3687 }
3688 }
3689
3690 omega_extract_distance_vectors (pb, ddr);
3691
3692 return true;
3693 }
3694
3695 /* Sets up the Omega dependence problem for the data dependence
3696 relation DDR. Returns false when the constraint system cannot be
3697 built, ie. when the test answers "don't know". Returns true
3698 otherwise, and when independence has been proved (using one of the
3699 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3700 set MAYBE_DEPENDENT to true.
3701
3702 Example: for setting up the dependence system corresponding to the
3703 conflicting accesses
3704
3705 | loop_i
3706 | loop_j
3707 | A[i, i+1] = ...
3708 | ... A[2*j, 2*(i + j)]
3709 | endloop_j
3710 | endloop_i
3711
3712 the following constraints come from the iteration domain:
3713
3714 0 <= i <= Ni
3715 0 <= i + di <= Ni
3716 0 <= j <= Nj
3717 0 <= j + dj <= Nj
3718
3719 where di, dj are the distance variables. The constraints
3720 representing the conflicting elements are:
3721
3722 i = 2 * (j + dj)
3723 i + 1 = 2 * (i + di + j + dj)
3724
3725 For asking that the resulting distance vector (di, dj) be
3726 lexicographically positive, we insert the constraint "di >= 0". If
3727 "di = 0" in the solution, we fix that component to zero, and we
3728 look at the inner loops: we set a new problem where all the outer
3729 loop distances are zero, and fix this inner component to be
3730 positive. When one of the components is positive, we save that
3731 distance, and set a new problem where the distance on this loop is
3732 zero, searching for other distances in the inner loops. Here is
3733 the classic example that illustrates that we have to set for each
3734 inner loop a new problem:
3735
3736 | loop_1
3737 | loop_2
3738 | A[10]
3739 | endloop_2
3740 | endloop_1
3741
3742 we have to save two distances (1, 0) and (0, 1).
3743
3744 Given two array references, refA and refB, we have to set the
3745 dependence problem twice, refA vs. refB and refB vs. refA, and we
3746 cannot do a single test, as refB might occur before refA in the
3747 inner loops, and the contrary when considering outer loops: ex.
3748
3749 | loop_0
3750 | loop_1
3751 | loop_2
3752 | T[{1,+,1}_2][{1,+,1}_1] // refA
3753 | T[{2,+,1}_2][{0,+,1}_1] // refB
3754 | endloop_2
3755 | endloop_1
3756 | endloop_0
3757
3758 refB touches the elements in T before refA, and thus for the same
3759 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3760 but for successive loop_0 iterations, we have (1, -1, 1)
3761
3762 The Omega solver expects the distance variables ("di" in the
3763 previous example) to come first in the constraint system (as
3764 variables to be protected, or "safe" variables), the constraint
3765 system is built using the following layout:
3766
3767 "cst | distance vars | index vars".
3768 */
3769
3770 static bool
3771 init_omega_for_ddr (struct data_dependence_relation *ddr,
3772 bool *maybe_dependent)
3773 {
3774 omega_pb pb;
3775 bool res = false;
3776
3777 *maybe_dependent = true;
3778
3779 if (same_access_functions (ddr))
3780 {
3781 unsigned j;
3782 lambda_vector dir_v;
3783
3784 /* Save the 0 vector. */
3785 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3786 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3787 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3788 dir_v[j] = dir_equal;
3789 save_dir_v (ddr, dir_v);
3790
3791 /* Save the dependences carried by outer loops. */
3792 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3793 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3794 maybe_dependent);
3795 omega_free_problem (pb);
3796 return res;
3797 }
3798
3799 /* Omega expects the protected variables (those that have to be kept
3800 after elimination) to appear first in the constraint system.
3801 These variables are the distance variables. In the following
3802 initialization we declare NB_LOOPS safe variables, and the total
3803 number of variables for the constraint system is 2*NB_LOOPS. */
3804 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3805 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3806 maybe_dependent);
3807 omega_free_problem (pb);
3808
3809 /* Stop computation if not decidable, or no dependence. */
3810 if (res == false || *maybe_dependent == false)
3811 return res;
3812
3813 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3814 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3815 maybe_dependent);
3816 omega_free_problem (pb);
3817
3818 return res;
3819 }
3820
3821 /* Return true when DDR contains the same information as that stored
3822 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3823
3824 static bool
3825 ddr_consistent_p (FILE *file,
3826 struct data_dependence_relation *ddr,
3827 VEC (lambda_vector, heap) *dist_vects,
3828 VEC (lambda_vector, heap) *dir_vects)
3829 {
3830 unsigned int i, j;
3831
3832 /* If dump_file is set, output there. */
3833 if (dump_file && (dump_flags & TDF_DETAILS))
3834 file = dump_file;
3835
3836 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3837 {
3838 lambda_vector b_dist_v;
3839 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3840 VEC_length (lambda_vector, dist_vects),
3841 DDR_NUM_DIST_VECTS (ddr));
3842
3843 fprintf (file, "Banerjee dist vectors:\n");
3844 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3845 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3846
3847 fprintf (file, "Omega dist vectors:\n");
3848 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3849 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3850
3851 fprintf (file, "data dependence relation:\n");
3852 dump_data_dependence_relation (file, ddr);
3853
3854 fprintf (file, ")\n");
3855 return false;
3856 }
3857
3858 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3859 {
3860 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3861 VEC_length (lambda_vector, dir_vects),
3862 DDR_NUM_DIR_VECTS (ddr));
3863 return false;
3864 }
3865
3866 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3867 {
3868 lambda_vector a_dist_v;
3869 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3870
3871 /* Distance vectors are not ordered in the same way in the DDR
3872 and in the DIST_VECTS: search for a matching vector. */
3873 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3874 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3875 break;
3876
3877 if (j == VEC_length (lambda_vector, dist_vects))
3878 {
3879 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3880 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3881 fprintf (file, "not found in Omega dist vectors:\n");
3882 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3883 fprintf (file, "data dependence relation:\n");
3884 dump_data_dependence_relation (file, ddr);
3885 fprintf (file, ")\n");
3886 }
3887 }
3888
3889 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3890 {
3891 lambda_vector a_dir_v;
3892 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3893
3894 /* Direction vectors are not ordered in the same way in the DDR
3895 and in the DIR_VECTS: search for a matching vector. */
3896 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3897 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3898 break;
3899
3900 if (j == VEC_length (lambda_vector, dist_vects))
3901 {
3902 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3903 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3904 fprintf (file, "not found in Omega dir vectors:\n");
3905 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3906 fprintf (file, "data dependence relation:\n");
3907 dump_data_dependence_relation (file, ddr);
3908 fprintf (file, ")\n");
3909 }
3910 }
3911
3912 return true;
3913 }
3914
3915 /* This computes the affine dependence relation between A and B with
3916 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3917 independence between two accesses, while CHREC_DONT_KNOW is used
3918 for representing the unknown relation.
3919
3920 Note that it is possible to stop the computation of the dependence
3921 relation the first time we detect a CHREC_KNOWN element for a given
3922 subscript. */
3923
3924 static void
3925 compute_affine_dependence (struct data_dependence_relation *ddr,
3926 struct loop *loop_nest)
3927 {
3928 struct data_reference *dra = DDR_A (ddr);
3929 struct data_reference *drb = DDR_B (ddr);
3930
3931 if (dump_file && (dump_flags & TDF_DETAILS))
3932 {
3933 fprintf (dump_file, "(compute_affine_dependence\n");
3934 fprintf (dump_file, " (stmt_a = \n");
3935 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3936 fprintf (dump_file, ")\n (stmt_b = \n");
3937 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3938 fprintf (dump_file, ")\n");
3939 }
3940
3941 /* Analyze only when the dependence relation is not yet known. */
3942 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3943 && !DDR_SELF_REFERENCE (ddr))
3944 {
3945 dependence_stats.num_dependence_tests++;
3946
3947 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3948 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3949 {
3950 if (flag_check_data_deps)
3951 {
3952 /* Compute the dependences using the first algorithm. */
3953 subscript_dependence_tester (ddr, loop_nest);
3954
3955 if (dump_file && (dump_flags & TDF_DETAILS))
3956 {
3957 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3958 dump_data_dependence_relation (dump_file, ddr);
3959 }
3960
3961 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3962 {
3963 bool maybe_dependent;
3964 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3965
3966 /* Save the result of the first DD analyzer. */
3967 dist_vects = DDR_DIST_VECTS (ddr);
3968 dir_vects = DDR_DIR_VECTS (ddr);
3969
3970 /* Reset the information. */
3971 DDR_DIST_VECTS (ddr) = NULL;
3972 DDR_DIR_VECTS (ddr) = NULL;
3973
3974 /* Compute the same information using Omega. */
3975 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3976 goto csys_dont_know;
3977
3978 if (dump_file && (dump_flags & TDF_DETAILS))
3979 {
3980 fprintf (dump_file, "Omega Analyzer\n");
3981 dump_data_dependence_relation (dump_file, ddr);
3982 }
3983
3984 /* Check that we get the same information. */
3985 if (maybe_dependent)
3986 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3987 dir_vects));
3988 }
3989 }
3990 else
3991 subscript_dependence_tester (ddr, loop_nest);
3992 }
3993
3994 /* As a last case, if the dependence cannot be determined, or if
3995 the dependence is considered too difficult to determine, answer
3996 "don't know". */
3997 else
3998 {
3999 csys_dont_know:;
4000 dependence_stats.num_dependence_undetermined++;
4001
4002 if (dump_file && (dump_flags & TDF_DETAILS))
4003 {
4004 fprintf (dump_file, "Data ref a:\n");
4005 dump_data_reference (dump_file, dra);
4006 fprintf (dump_file, "Data ref b:\n");
4007 dump_data_reference (dump_file, drb);
4008 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4009 }
4010 finalize_ddr_dependent (ddr, chrec_dont_know);
4011 }
4012 }
4013
4014 if (dump_file && (dump_flags & TDF_DETAILS))
4015 fprintf (dump_file, ")\n");
4016 }
4017
4018 /* This computes the dependence relation for the same data
4019 reference into DDR. */
4020
4021 static void
4022 compute_self_dependence (struct data_dependence_relation *ddr)
4023 {
4024 unsigned int i;
4025 struct subscript *subscript;
4026
4027 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4028 return;
4029
4030 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4031 i++)
4032 {
4033 if (SUB_CONFLICTS_IN_A (subscript))
4034 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4035 if (SUB_CONFLICTS_IN_B (subscript))
4036 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4037
4038 /* The accessed index overlaps for each iteration. */
4039 SUB_CONFLICTS_IN_A (subscript)
4040 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4041 SUB_CONFLICTS_IN_B (subscript)
4042 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4043 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4044 }
4045
4046 /* The distance vector is the zero vector. */
4047 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4048 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4049 }
4050
4051 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4052 the data references in DATAREFS, in the LOOP_NEST. When
4053 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4054 relations. */
4055
4056 void
4057 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4058 VEC (ddr_p, heap) **dependence_relations,
4059 VEC (loop_p, heap) *loop_nest,
4060 bool compute_self_and_rr)
4061 {
4062 struct data_dependence_relation *ddr;
4063 struct data_reference *a, *b;
4064 unsigned int i, j;
4065
4066 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4067 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4068 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
4069 {
4070 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4071 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4072 if (loop_nest)
4073 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4074 }
4075
4076 if (compute_self_and_rr)
4077 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4078 {
4079 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4080 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4081 compute_self_dependence (ddr);
4082 }
4083 }
4084
4085 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4086 true if STMT clobbers memory, false otherwise. */
4087
4088 bool
4089 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4090 {
4091 bool clobbers_memory = false;
4092 data_ref_loc *ref;
4093 tree *op0, *op1;
4094 enum gimple_code stmt_code = gimple_code (stmt);
4095
4096 *references = NULL;
4097
4098 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4099 Calls have side-effects, except those to const or pure
4100 functions. */
4101 if ((stmt_code == GIMPLE_CALL
4102 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4103 || (stmt_code == GIMPLE_ASM
4104 && gimple_asm_volatile_p (stmt)))
4105 clobbers_memory = true;
4106
4107 if (!gimple_vuse (stmt))
4108 return clobbers_memory;
4109
4110 if (stmt_code == GIMPLE_ASSIGN)
4111 {
4112 tree base;
4113 op0 = gimple_assign_lhs_ptr (stmt);
4114 op1 = gimple_assign_rhs1_ptr (stmt);
4115
4116 if (DECL_P (*op1)
4117 || (REFERENCE_CLASS_P (*op1)
4118 && (base = get_base_address (*op1))
4119 && TREE_CODE (base) != SSA_NAME))
4120 {
4121 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4122 ref->pos = op1;
4123 ref->is_read = true;
4124 }
4125
4126 if (DECL_P (*op0)
4127 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4128 {
4129 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4130 ref->pos = op0;
4131 ref->is_read = false;
4132 }
4133 }
4134 else if (stmt_code == GIMPLE_CALL)
4135 {
4136 unsigned i, n = gimple_call_num_args (stmt);
4137
4138 for (i = 0; i < n; i++)
4139 {
4140 op0 = gimple_call_arg_ptr (stmt, i);
4141
4142 if (DECL_P (*op0)
4143 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4144 {
4145 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4146 ref->pos = op0;
4147 ref->is_read = true;
4148 }
4149 }
4150 }
4151
4152 return clobbers_memory;
4153 }
4154
4155 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4156 reference, returns false, otherwise returns true. NEST is the outermost
4157 loop of the loop nest in which the references should be analyzed. */
4158
4159 bool
4160 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4161 VEC (data_reference_p, heap) **datarefs)
4162 {
4163 unsigned i;
4164 VEC (data_ref_loc, heap) *references;
4165 data_ref_loc *ref;
4166 bool ret = true;
4167 data_reference_p dr;
4168
4169 if (get_references_in_stmt (stmt, &references))
4170 {
4171 VEC_free (data_ref_loc, heap, references);
4172 return false;
4173 }
4174
4175 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4176 {
4177 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4178 gcc_assert (dr != NULL);
4179
4180 /* FIXME -- data dependence analysis does not work correctly for objects
4181 with invariant addresses in loop nests. Let us fail here until the
4182 problem is fixed. */
4183 if (dr_address_invariant_p (dr) && nest)
4184 {
4185 free_data_ref (dr);
4186 if (dump_file && (dump_flags & TDF_DETAILS))
4187 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4188 ret = false;
4189 break;
4190 }
4191
4192 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4193 }
4194 VEC_free (data_ref_loc, heap, references);
4195 return ret;
4196 }
4197
4198 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4199 reference, returns false, otherwise returns true. NEST is the outermost
4200 loop of the loop nest in which the references should be analyzed. */
4201
4202 bool
4203 graphite_find_data_references_in_stmt (struct loop *nest, gimple stmt,
4204 VEC (data_reference_p, heap) **datarefs)
4205 {
4206 unsigned i;
4207 VEC (data_ref_loc, heap) *references;
4208 data_ref_loc *ref;
4209 bool ret = true;
4210 data_reference_p dr;
4211
4212 if (get_references_in_stmt (stmt, &references))
4213 {
4214 VEC_free (data_ref_loc, heap, references);
4215 return false;
4216 }
4217
4218 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4219 {
4220 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4221 gcc_assert (dr != NULL);
4222 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4223 }
4224
4225 VEC_free (data_ref_loc, heap, references);
4226 return ret;
4227 }
4228
4229 /* Search the data references in LOOP, and record the information into
4230 DATAREFS. Returns chrec_dont_know when failing to analyze a
4231 difficult case, returns NULL_TREE otherwise. */
4232
4233 static tree
4234 find_data_references_in_bb (struct loop *loop, basic_block bb,
4235 VEC (data_reference_p, heap) **datarefs)
4236 {
4237 gimple_stmt_iterator bsi;
4238
4239 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4240 {
4241 gimple stmt = gsi_stmt (bsi);
4242
4243 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4244 {
4245 struct data_reference *res;
4246 res = XCNEW (struct data_reference);
4247 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4248
4249 return chrec_dont_know;
4250 }
4251 }
4252
4253 return NULL_TREE;
4254 }
4255
4256 /* Search the data references in LOOP, and record the information into
4257 DATAREFS. Returns chrec_dont_know when failing to analyze a
4258 difficult case, returns NULL_TREE otherwise.
4259
4260 TODO: This function should be made smarter so that it can handle address
4261 arithmetic as if they were array accesses, etc. */
4262
4263 tree
4264 find_data_references_in_loop (struct loop *loop,
4265 VEC (data_reference_p, heap) **datarefs)
4266 {
4267 basic_block bb, *bbs;
4268 unsigned int i;
4269
4270 bbs = get_loop_body_in_dom_order (loop);
4271
4272 for (i = 0; i < loop->num_nodes; i++)
4273 {
4274 bb = bbs[i];
4275
4276 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4277 {
4278 free (bbs);
4279 return chrec_dont_know;
4280 }
4281 }
4282 free (bbs);
4283
4284 return NULL_TREE;
4285 }
4286
4287 /* Recursive helper function. */
4288
4289 static bool
4290 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4291 {
4292 /* Inner loops of the nest should not contain siblings. Example:
4293 when there are two consecutive loops,
4294
4295 | loop_0
4296 | loop_1
4297 | A[{0, +, 1}_1]
4298 | endloop_1
4299 | loop_2
4300 | A[{0, +, 1}_2]
4301 | endloop_2
4302 | endloop_0
4303
4304 the dependence relation cannot be captured by the distance
4305 abstraction. */
4306 if (loop->next)
4307 return false;
4308
4309 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4310 if (loop->inner)
4311 return find_loop_nest_1 (loop->inner, loop_nest);
4312 return true;
4313 }
4314
4315 /* Return false when the LOOP is not well nested. Otherwise return
4316 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4317 contain the loops from the outermost to the innermost, as they will
4318 appear in the classic distance vector. */
4319
4320 bool
4321 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4322 {
4323 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4324 if (loop->inner)
4325 return find_loop_nest_1 (loop->inner, loop_nest);
4326 return true;
4327 }
4328
4329 /* Returns true when the data dependences have been computed, false otherwise.
4330 Given a loop nest LOOP, the following vectors are returned:
4331 DATAREFS is initialized to all the array elements contained in this loop,
4332 DEPENDENCE_RELATIONS contains the relations between the data references.
4333 Compute read-read and self relations if
4334 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4335
4336 bool
4337 compute_data_dependences_for_loop (struct loop *loop,
4338 bool compute_self_and_read_read_dependences,
4339 VEC (data_reference_p, heap) **datarefs,
4340 VEC (ddr_p, heap) **dependence_relations)
4341 {
4342 bool res = true;
4343 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4344
4345 memset (&dependence_stats, 0, sizeof (dependence_stats));
4346
4347 /* If the loop nest is not well formed, or one of the data references
4348 is not computable, give up without spending time to compute other
4349 dependences. */
4350 if (!loop
4351 || !find_loop_nest (loop, &vloops)
4352 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4353 {
4354 struct data_dependence_relation *ddr;
4355
4356 /* Insert a single relation into dependence_relations:
4357 chrec_dont_know. */
4358 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4359 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4360 res = false;
4361 }
4362 else
4363 compute_all_dependences (*datarefs, dependence_relations, vloops,
4364 compute_self_and_read_read_dependences);
4365
4366 if (dump_file && (dump_flags & TDF_STATS))
4367 {
4368 fprintf (dump_file, "Dependence tester statistics:\n");
4369
4370 fprintf (dump_file, "Number of dependence tests: %d\n",
4371 dependence_stats.num_dependence_tests);
4372 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4373 dependence_stats.num_dependence_dependent);
4374 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4375 dependence_stats.num_dependence_independent);
4376 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4377 dependence_stats.num_dependence_undetermined);
4378
4379 fprintf (dump_file, "Number of subscript tests: %d\n",
4380 dependence_stats.num_subscript_tests);
4381 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4382 dependence_stats.num_subscript_undetermined);
4383 fprintf (dump_file, "Number of same subscript function: %d\n",
4384 dependence_stats.num_same_subscript_function);
4385
4386 fprintf (dump_file, "Number of ziv tests: %d\n",
4387 dependence_stats.num_ziv);
4388 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4389 dependence_stats.num_ziv_dependent);
4390 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4391 dependence_stats.num_ziv_independent);
4392 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4393 dependence_stats.num_ziv_unimplemented);
4394
4395 fprintf (dump_file, "Number of siv tests: %d\n",
4396 dependence_stats.num_siv);
4397 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4398 dependence_stats.num_siv_dependent);
4399 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4400 dependence_stats.num_siv_independent);
4401 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4402 dependence_stats.num_siv_unimplemented);
4403
4404 fprintf (dump_file, "Number of miv tests: %d\n",
4405 dependence_stats.num_miv);
4406 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4407 dependence_stats.num_miv_dependent);
4408 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4409 dependence_stats.num_miv_independent);
4410 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4411 dependence_stats.num_miv_unimplemented);
4412 }
4413
4414 return res;
4415 }
4416
4417 /* Returns true when the data dependences for the basic block BB have been
4418 computed, false otherwise.
4419 DATAREFS is initialized to all the array elements contained in this basic
4420 block, DEPENDENCE_RELATIONS contains the relations between the data
4421 references. Compute read-read and self relations if
4422 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4423 bool
4424 compute_data_dependences_for_bb (basic_block bb,
4425 bool compute_self_and_read_read_dependences,
4426 VEC (data_reference_p, heap) **datarefs,
4427 VEC (ddr_p, heap) **dependence_relations)
4428 {
4429 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4430 return false;
4431
4432 compute_all_dependences (*datarefs, dependence_relations, NULL,
4433 compute_self_and_read_read_dependences);
4434 return true;
4435 }
4436
4437 /* Entry point (for testing only). Analyze all the data references
4438 and the dependence relations in LOOP.
4439
4440 The data references are computed first.
4441
4442 A relation on these nodes is represented by a complete graph. Some
4443 of the relations could be of no interest, thus the relations can be
4444 computed on demand.
4445
4446 In the following function we compute all the relations. This is
4447 just a first implementation that is here for:
4448 - for showing how to ask for the dependence relations,
4449 - for the debugging the whole dependence graph,
4450 - for the dejagnu testcases and maintenance.
4451
4452 It is possible to ask only for a part of the graph, avoiding to
4453 compute the whole dependence graph. The computed dependences are
4454 stored in a knowledge base (KB) such that later queries don't
4455 recompute the same information. The implementation of this KB is
4456 transparent to the optimizer, and thus the KB can be changed with a
4457 more efficient implementation, or the KB could be disabled. */
4458 static void
4459 analyze_all_data_dependences (struct loop *loop)
4460 {
4461 unsigned int i;
4462 int nb_data_refs = 10;
4463 VEC (data_reference_p, heap) *datarefs =
4464 VEC_alloc (data_reference_p, heap, nb_data_refs);
4465 VEC (ddr_p, heap) *dependence_relations =
4466 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4467
4468 /* Compute DDs on the whole function. */
4469 compute_data_dependences_for_loop (loop, false, &datarefs,
4470 &dependence_relations);
4471
4472 if (dump_file)
4473 {
4474 dump_data_dependence_relations (dump_file, dependence_relations);
4475 fprintf (dump_file, "\n\n");
4476
4477 if (dump_flags & TDF_DETAILS)
4478 dump_dist_dir_vectors (dump_file, dependence_relations);
4479
4480 if (dump_flags & TDF_STATS)
4481 {
4482 unsigned nb_top_relations = 0;
4483 unsigned nb_bot_relations = 0;
4484 unsigned nb_chrec_relations = 0;
4485 struct data_dependence_relation *ddr;
4486
4487 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4488 {
4489 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4490 nb_top_relations++;
4491
4492 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4493 nb_bot_relations++;
4494
4495 else
4496 nb_chrec_relations++;
4497 }
4498
4499 gather_stats_on_scev_database ();
4500 }
4501 }
4502
4503 free_dependence_relations (dependence_relations);
4504 free_data_refs (datarefs);
4505 }
4506
4507 /* Computes all the data dependences and check that the results of
4508 several analyzers are the same. */
4509
4510 void
4511 tree_check_data_deps (void)
4512 {
4513 loop_iterator li;
4514 struct loop *loop_nest;
4515
4516 FOR_EACH_LOOP (li, loop_nest, 0)
4517 analyze_all_data_dependences (loop_nest);
4518 }
4519
4520 /* Free the memory used by a data dependence relation DDR. */
4521
4522 void
4523 free_dependence_relation (struct data_dependence_relation *ddr)
4524 {
4525 if (ddr == NULL)
4526 return;
4527
4528 if (DDR_SUBSCRIPTS (ddr))
4529 free_subscripts (DDR_SUBSCRIPTS (ddr));
4530 if (DDR_DIST_VECTS (ddr))
4531 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4532 if (DDR_DIR_VECTS (ddr))
4533 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4534
4535 free (ddr);
4536 }
4537
4538 /* Free the memory used by the data dependence relations from
4539 DEPENDENCE_RELATIONS. */
4540
4541 void
4542 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4543 {
4544 unsigned int i;
4545 struct data_dependence_relation *ddr;
4546 VEC (loop_p, heap) *loop_nest = NULL;
4547
4548 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4549 {
4550 if (ddr == NULL)
4551 continue;
4552 if (loop_nest == NULL)
4553 loop_nest = DDR_LOOP_NEST (ddr);
4554 else
4555 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4556 || DDR_LOOP_NEST (ddr) == loop_nest);
4557 free_dependence_relation (ddr);
4558 }
4559
4560 if (loop_nest)
4561 VEC_free (loop_p, heap, loop_nest);
4562 VEC_free (ddr_p, heap, dependence_relations);
4563 }
4564
4565 /* Free the memory used by the data references from DATAREFS. */
4566
4567 void
4568 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4569 {
4570 unsigned int i;
4571 struct data_reference *dr;
4572
4573 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4574 free_data_ref (dr);
4575 VEC_free (data_reference_p, heap, datarefs);
4576 }
4577
4578 \f
4579
4580 /* Dump vertex I in RDG to FILE. */
4581
4582 void
4583 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4584 {
4585 struct vertex *v = &(rdg->vertices[i]);
4586 struct graph_edge *e;
4587
4588 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4589 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4590 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4591
4592 if (v->pred)
4593 for (e = v->pred; e; e = e->pred_next)
4594 fprintf (file, " %d", e->src);
4595
4596 fprintf (file, ") (out:");
4597
4598 if (v->succ)
4599 for (e = v->succ; e; e = e->succ_next)
4600 fprintf (file, " %d", e->dest);
4601
4602 fprintf (file, ") \n");
4603 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4604 fprintf (file, ")\n");
4605 }
4606
4607 /* Call dump_rdg_vertex on stderr. */
4608
4609 void
4610 debug_rdg_vertex (struct graph *rdg, int i)
4611 {
4612 dump_rdg_vertex (stderr, rdg, i);
4613 }
4614
4615 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4616 dumped vertices to that bitmap. */
4617
4618 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4619 {
4620 int i;
4621
4622 fprintf (file, "(%d\n", c);
4623
4624 for (i = 0; i < rdg->n_vertices; i++)
4625 if (rdg->vertices[i].component == c)
4626 {
4627 if (dumped)
4628 bitmap_set_bit (dumped, i);
4629
4630 dump_rdg_vertex (file, rdg, i);
4631 }
4632
4633 fprintf (file, ")\n");
4634 }
4635
4636 /* Call dump_rdg_vertex on stderr. */
4637
4638 void
4639 debug_rdg_component (struct graph *rdg, int c)
4640 {
4641 dump_rdg_component (stderr, rdg, c, NULL);
4642 }
4643
4644 /* Dump the reduced dependence graph RDG to FILE. */
4645
4646 void
4647 dump_rdg (FILE *file, struct graph *rdg)
4648 {
4649 int i;
4650 bitmap dumped = BITMAP_ALLOC (NULL);
4651
4652 fprintf (file, "(rdg\n");
4653
4654 for (i = 0; i < rdg->n_vertices; i++)
4655 if (!bitmap_bit_p (dumped, i))
4656 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4657
4658 fprintf (file, ")\n");
4659 BITMAP_FREE (dumped);
4660 }
4661
4662 /* Call dump_rdg on stderr. */
4663
4664 void
4665 debug_rdg (struct graph *rdg)
4666 {
4667 dump_rdg (stderr, rdg);
4668 }
4669
4670 /* This structure is used for recording the mapping statement index in
4671 the RDG. */
4672
4673 struct GTY(()) rdg_vertex_info
4674 {
4675 gimple stmt;
4676 int index;
4677 };
4678
4679 /* Returns the index of STMT in RDG. */
4680
4681 int
4682 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4683 {
4684 struct rdg_vertex_info rvi, *slot;
4685
4686 rvi.stmt = stmt;
4687 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4688
4689 if (!slot)
4690 return -1;
4691
4692 return slot->index;
4693 }
4694
4695 /* Creates an edge in RDG for each distance vector from DDR. The
4696 order that we keep track of in the RDG is the order in which
4697 statements have to be executed. */
4698
4699 static void
4700 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4701 {
4702 struct graph_edge *e;
4703 int va, vb;
4704 data_reference_p dra = DDR_A (ddr);
4705 data_reference_p drb = DDR_B (ddr);
4706 unsigned level = ddr_dependence_level (ddr);
4707
4708 /* For non scalar dependences, when the dependence is REVERSED,
4709 statement B has to be executed before statement A. */
4710 if (level > 0
4711 && !DDR_REVERSED_P (ddr))
4712 {
4713 data_reference_p tmp = dra;
4714 dra = drb;
4715 drb = tmp;
4716 }
4717
4718 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4719 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4720
4721 if (va < 0 || vb < 0)
4722 return;
4723
4724 e = add_edge (rdg, va, vb);
4725 e->data = XNEW (struct rdg_edge);
4726
4727 RDGE_LEVEL (e) = level;
4728 RDGE_RELATION (e) = ddr;
4729
4730 /* Determines the type of the data dependence. */
4731 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4732 RDGE_TYPE (e) = input_dd;
4733 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4734 RDGE_TYPE (e) = output_dd;
4735 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4736 RDGE_TYPE (e) = flow_dd;
4737 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4738 RDGE_TYPE (e) = anti_dd;
4739 }
4740
4741 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4742 the index of DEF in RDG. */
4743
4744 static void
4745 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4746 {
4747 use_operand_p imm_use_p;
4748 imm_use_iterator iterator;
4749
4750 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4751 {
4752 struct graph_edge *e;
4753 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4754
4755 if (use < 0)
4756 continue;
4757
4758 e = add_edge (rdg, idef, use);
4759 e->data = XNEW (struct rdg_edge);
4760 RDGE_TYPE (e) = flow_dd;
4761 RDGE_RELATION (e) = NULL;
4762 }
4763 }
4764
4765 /* Creates the edges of the reduced dependence graph RDG. */
4766
4767 static void
4768 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4769 {
4770 int i;
4771 struct data_dependence_relation *ddr;
4772 def_operand_p def_p;
4773 ssa_op_iter iter;
4774
4775 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4776 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4777 create_rdg_edge_for_ddr (rdg, ddr);
4778
4779 for (i = 0; i < rdg->n_vertices; i++)
4780 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4781 iter, SSA_OP_DEF)
4782 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4783 }
4784
4785 /* Build the vertices of the reduced dependence graph RDG. */
4786
4787 void
4788 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4789 {
4790 int i, j;
4791 gimple stmt;
4792
4793 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
4794 {
4795 VEC (data_ref_loc, heap) *references;
4796 data_ref_loc *ref;
4797 struct vertex *v = &(rdg->vertices[i]);
4798 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4799 struct rdg_vertex_info **slot;
4800
4801 rvi->stmt = stmt;
4802 rvi->index = i;
4803 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4804
4805 if (!*slot)
4806 *slot = rvi;
4807 else
4808 free (rvi);
4809
4810 v->data = XNEW (struct rdg_vertex);
4811 RDG_STMT (rdg, i) = stmt;
4812
4813 RDG_MEM_WRITE_STMT (rdg, i) = false;
4814 RDG_MEM_READS_STMT (rdg, i) = false;
4815 if (gimple_code (stmt) == GIMPLE_PHI)
4816 continue;
4817
4818 get_references_in_stmt (stmt, &references);
4819 for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
4820 if (!ref->is_read)
4821 RDG_MEM_WRITE_STMT (rdg, i) = true;
4822 else
4823 RDG_MEM_READS_STMT (rdg, i) = true;
4824
4825 VEC_free (data_ref_loc, heap, references);
4826 }
4827 }
4828
4829 /* Initialize STMTS with all the statements of LOOP. When
4830 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4831 which we discover statements is important as
4832 generate_loops_for_partition is using the same traversal for
4833 identifying statements. */
4834
4835 static void
4836 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4837 {
4838 unsigned int i;
4839 basic_block *bbs = get_loop_body_in_dom_order (loop);
4840
4841 for (i = 0; i < loop->num_nodes; i++)
4842 {
4843 basic_block bb = bbs[i];
4844 gimple_stmt_iterator bsi;
4845 gimple stmt;
4846
4847 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4848 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4849
4850 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4851 {
4852 stmt = gsi_stmt (bsi);
4853 if (gimple_code (stmt) != GIMPLE_LABEL)
4854 VEC_safe_push (gimple, heap, *stmts, stmt);
4855 }
4856 }
4857
4858 free (bbs);
4859 }
4860
4861 /* Returns true when all the dependences are computable. */
4862
4863 static bool
4864 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4865 {
4866 ddr_p ddr;
4867 unsigned int i;
4868
4869 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4870 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4871 return false;
4872
4873 return true;
4874 }
4875
4876 /* Computes a hash function for element ELT. */
4877
4878 static hashval_t
4879 hash_stmt_vertex_info (const void *elt)
4880 {
4881 const struct rdg_vertex_info *const rvi =
4882 (const struct rdg_vertex_info *) elt;
4883 gimple stmt = rvi->stmt;
4884
4885 return htab_hash_pointer (stmt);
4886 }
4887
4888 /* Compares database elements E1 and E2. */
4889
4890 static int
4891 eq_stmt_vertex_info (const void *e1, const void *e2)
4892 {
4893 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4894 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4895
4896 return elt1->stmt == elt2->stmt;
4897 }
4898
4899 /* Free the element E. */
4900
4901 static void
4902 hash_stmt_vertex_del (void *e)
4903 {
4904 free (e);
4905 }
4906
4907 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4908 statement of the loop nest, and one edge per data dependence or
4909 scalar dependence. */
4910
4911 struct graph *
4912 build_empty_rdg (int n_stmts)
4913 {
4914 int nb_data_refs = 10;
4915 struct graph *rdg = new_graph (n_stmts);
4916
4917 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4918 eq_stmt_vertex_info, hash_stmt_vertex_del);
4919 return rdg;
4920 }
4921
4922 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4923 statement of the loop nest, and one edge per data dependence or
4924 scalar dependence. */
4925
4926 struct graph *
4927 build_rdg (struct loop *loop)
4928 {
4929 int nb_data_refs = 10;
4930 struct graph *rdg = NULL;
4931 VEC (ddr_p, heap) *dependence_relations;
4932 VEC (data_reference_p, heap) *datarefs;
4933 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, nb_data_refs);
4934
4935 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4936 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4937 compute_data_dependences_for_loop (loop,
4938 false,
4939 &datarefs,
4940 &dependence_relations);
4941
4942 if (!known_dependences_p (dependence_relations))
4943 {
4944 free_dependence_relations (dependence_relations);
4945 free_data_refs (datarefs);
4946 VEC_free (gimple, heap, stmts);
4947
4948 return rdg;
4949 }
4950
4951 stmts_from_loop (loop, &stmts);
4952 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4953
4954 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4955 eq_stmt_vertex_info, hash_stmt_vertex_del);
4956 create_rdg_vertices (rdg, stmts);
4957 create_rdg_edges (rdg, dependence_relations);
4958
4959 VEC_free (gimple, heap, stmts);
4960 return rdg;
4961 }
4962
4963 /* Free the reduced dependence graph RDG. */
4964
4965 void
4966 free_rdg (struct graph *rdg)
4967 {
4968 int i;
4969
4970 for (i = 0; i < rdg->n_vertices; i++)
4971 free (rdg->vertices[i].data);
4972
4973 htab_delete (rdg->indices);
4974 free_graph (rdg);
4975 }
4976
4977 /* Initialize STMTS with all the statements of LOOP that contain a
4978 store to memory. */
4979
4980 void
4981 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4982 {
4983 unsigned int i;
4984 basic_block *bbs = get_loop_body_in_dom_order (loop);
4985
4986 for (i = 0; i < loop->num_nodes; i++)
4987 {
4988 basic_block bb = bbs[i];
4989 gimple_stmt_iterator bsi;
4990
4991 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4992 if (gimple_vdef (gsi_stmt (bsi)))
4993 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4994 }
4995
4996 free (bbs);
4997 }
4998
4999 /* For a data reference REF, return the declaration of its base
5000 address or NULL_TREE if the base is not determined. */
5001
5002 static inline tree
5003 ref_base_address (gimple stmt, data_ref_loc *ref)
5004 {
5005 tree base = NULL_TREE;
5006 tree base_address;
5007 struct data_reference *dr = XCNEW (struct data_reference);
5008
5009 DR_STMT (dr) = stmt;
5010 DR_REF (dr) = *ref->pos;
5011 dr_analyze_innermost (dr);
5012 base_address = DR_BASE_ADDRESS (dr);
5013
5014 if (!base_address)
5015 goto end;
5016
5017 switch (TREE_CODE (base_address))
5018 {
5019 case ADDR_EXPR:
5020 base = TREE_OPERAND (base_address, 0);
5021 break;
5022
5023 default:
5024 base = base_address;
5025 break;
5026 }
5027
5028 end:
5029 free_data_ref (dr);
5030 return base;
5031 }
5032
5033 /* Determines whether the statement from vertex V of the RDG has a
5034 definition used outside the loop that contains this statement. */
5035
5036 bool
5037 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5038 {
5039 gimple stmt = RDG_STMT (rdg, v);
5040 struct loop *loop = loop_containing_stmt (stmt);
5041 use_operand_p imm_use_p;
5042 imm_use_iterator iterator;
5043 ssa_op_iter it;
5044 def_operand_p def_p;
5045
5046 if (!loop)
5047 return true;
5048
5049 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5050 {
5051 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5052 {
5053 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5054 return true;
5055 }
5056 }
5057
5058 return false;
5059 }
5060
5061 /* Determines whether statements S1 and S2 access to similar memory
5062 locations. Two memory accesses are considered similar when they
5063 have the same base address declaration, i.e. when their
5064 ref_base_address is the same. */
5065
5066 bool
5067 have_similar_memory_accesses (gimple s1, gimple s2)
5068 {
5069 bool res = false;
5070 unsigned i, j;
5071 VEC (data_ref_loc, heap) *refs1, *refs2;
5072 data_ref_loc *ref1, *ref2;
5073
5074 get_references_in_stmt (s1, &refs1);
5075 get_references_in_stmt (s2, &refs2);
5076
5077 for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
5078 {
5079 tree base1 = ref_base_address (s1, ref1);
5080
5081 if (base1)
5082 for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
5083 if (base1 == ref_base_address (s2, ref2))
5084 {
5085 res = true;
5086 goto end;
5087 }
5088 }
5089
5090 end:
5091 VEC_free (data_ref_loc, heap, refs1);
5092 VEC_free (data_ref_loc, heap, refs2);
5093 return res;
5094 }
5095
5096 /* Helper function for the hashtab. */
5097
5098 static int
5099 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5100 {
5101 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5102 CONST_CAST_GIMPLE ((const_gimple) s2));
5103 }
5104
5105 /* Helper function for the hashtab. */
5106
5107 static hashval_t
5108 ref_base_address_1 (const void *s)
5109 {
5110 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5111 unsigned i;
5112 VEC (data_ref_loc, heap) *refs;
5113 data_ref_loc *ref;
5114 hashval_t res = 0;
5115
5116 get_references_in_stmt (stmt, &refs);
5117
5118 for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
5119 if (!ref->is_read)
5120 {
5121 res = htab_hash_pointer (ref_base_address (stmt, ref));
5122 break;
5123 }
5124
5125 VEC_free (data_ref_loc, heap, refs);
5126 return res;
5127 }
5128
5129 /* Try to remove duplicated write data references from STMTS. */
5130
5131 void
5132 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5133 {
5134 unsigned i;
5135 gimple stmt;
5136 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5137 have_similar_memory_accesses_1, NULL);
5138
5139 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5140 {
5141 void **slot;
5142
5143 slot = htab_find_slot (seen, stmt, INSERT);
5144
5145 if (*slot)
5146 VEC_ordered_remove (gimple, *stmts, i);
5147 else
5148 {
5149 *slot = (void *) stmt;
5150 i++;
5151 }
5152 }
5153
5154 htab_delete (seen);
5155 }
5156
5157 /* Returns the index of PARAMETER in the parameters vector of the
5158 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5159
5160 int
5161 access_matrix_get_index_for_parameter (tree parameter,
5162 struct access_matrix *access_matrix)
5163 {
5164 int i;
5165 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5166 tree lambda_parameter;
5167
5168 for (i = 0; VEC_iterate (tree, lambda_parameters, i, lambda_parameter); i++)
5169 if (lambda_parameter == parameter)
5170 return i + AM_NB_INDUCTION_VARS (access_matrix);
5171
5172 return -1;
5173 }