]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-data-ref.c
2011-08-24 Richard Guenther <rguenther@suse.de>
[thirdparty/gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88
89 static struct datadep_stats
90 {
91 int num_dependence_tests;
92 int num_dependence_dependent;
93 int num_dependence_independent;
94 int num_dependence_undetermined;
95
96 int num_subscript_tests;
97 int num_subscript_undetermined;
98 int num_same_subscript_function;
99
100 int num_ziv;
101 int num_ziv_independent;
102 int num_ziv_dependent;
103 int num_ziv_unimplemented;
104
105 int num_siv;
106 int num_siv_independent;
107 int num_siv_dependent;
108 int num_siv_unimplemented;
109
110 int num_miv;
111 int num_miv_independent;
112 int num_miv_dependent;
113 int num_miv_unimplemented;
114 } dependence_stats;
115
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 struct data_reference *,
118 struct data_reference *,
119 struct loop *);
120 /* Returns true iff A divides B. */
121
122 static inline bool
123 tree_fold_divides_p (const_tree a, const_tree b)
124 {
125 gcc_assert (TREE_CODE (a) == INTEGER_CST);
126 gcc_assert (TREE_CODE (b) == INTEGER_CST);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
128 }
129
130 /* Returns true iff A divides B. */
131
132 static inline bool
133 int_divides_p (int a, int b)
134 {
135 return ((b % a) == 0);
136 }
137
138 \f
139
140 /* Dump into FILE all the data references from DATAREFS. */
141
142 void
143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
144 {
145 unsigned int i;
146 struct data_reference *dr;
147
148 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
149 dump_data_reference (file, dr);
150 }
151
152 /* Dump into STDERR all the data references from DATAREFS. */
153
154 DEBUG_FUNCTION void
155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 {
157 dump_data_references (stderr, datarefs);
158 }
159
160 /* Dump to STDERR all the dependence relations from DDRS. */
161
162 DEBUG_FUNCTION void
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 {
165 dump_data_dependence_relations (stderr, ddrs);
166 }
167
168 /* Dump into FILE all the dependence relations from DDRS. */
169
170 void
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
173 {
174 unsigned int i;
175 struct data_dependence_relation *ddr;
176
177 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
178 dump_data_dependence_relation (file, ddr);
179 }
180
181 /* Print to STDERR the data_reference DR. */
182
183 DEBUG_FUNCTION void
184 debug_data_reference (struct data_reference *dr)
185 {
186 dump_data_reference (stderr, dr);
187 }
188
189 /* Dump function for a DATA_REFERENCE structure. */
190
191 void
192 dump_data_reference (FILE *outf,
193 struct data_reference *dr)
194 {
195 unsigned int i;
196
197 fprintf (outf, "#(Data Ref: \n");
198 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
199 fprintf (outf, "# stmt: ");
200 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
201 fprintf (outf, "# ref: ");
202 print_generic_stmt (outf, DR_REF (dr), 0);
203 fprintf (outf, "# base_object: ");
204 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205
206 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 {
208 fprintf (outf, "# Access function %d: ", i);
209 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 }
211 fprintf (outf, "#)\n");
212 }
213
214 /* Dumps the affine function described by FN to the file OUTF. */
215
216 static void
217 dump_affine_function (FILE *outf, affine_fn fn)
218 {
219 unsigned i;
220 tree coef;
221
222 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
223 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 {
225 fprintf (outf, " + ");
226 print_generic_expr (outf, coef, TDF_SLIM);
227 fprintf (outf, " * x_%u", i);
228 }
229 }
230
231 /* Dumps the conflict function CF to the file OUTF. */
232
233 static void
234 dump_conflict_function (FILE *outf, conflict_function *cf)
235 {
236 unsigned i;
237
238 if (cf->n == NO_DEPENDENCE)
239 fprintf (outf, "no dependence\n");
240 else if (cf->n == NOT_KNOWN)
241 fprintf (outf, "not known\n");
242 else
243 {
244 for (i = 0; i < cf->n; i++)
245 {
246 fprintf (outf, "[");
247 dump_affine_function (outf, cf->fns[i]);
248 fprintf (outf, "]\n");
249 }
250 }
251 }
252
253 /* Dump function for a SUBSCRIPT structure. */
254
255 void
256 dump_subscript (FILE *outf, struct subscript *subscript)
257 {
258 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259
260 fprintf (outf, "\n (subscript \n");
261 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
264 {
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
268 }
269
270 cf = SUB_CONFLICTS_IN_B (subscript);
271 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
272 dump_conflict_function (outf, cf);
273 if (CF_NONTRIVIAL_P (cf))
274 {
275 tree last_iteration = SUB_LAST_CONFLICT (subscript);
276 fprintf (outf, " last_conflict: ");
277 print_generic_stmt (outf, last_iteration, 0);
278 }
279
280 fprintf (outf, " (Subscript distance: ");
281 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
282 fprintf (outf, " )\n");
283 fprintf (outf, " )\n");
284 }
285
286 /* Print the classic direction vector DIRV to OUTF. */
287
288 void
289 print_direction_vector (FILE *outf,
290 lambda_vector dirv,
291 int length)
292 {
293 int eq;
294
295 for (eq = 0; eq < length; eq++)
296 {
297 enum data_dependence_direction dir = ((enum data_dependence_direction)
298 dirv[eq]);
299
300 switch (dir)
301 {
302 case dir_positive:
303 fprintf (outf, " +");
304 break;
305 case dir_negative:
306 fprintf (outf, " -");
307 break;
308 case dir_equal:
309 fprintf (outf, " =");
310 break;
311 case dir_positive_or_equal:
312 fprintf (outf, " +=");
313 break;
314 case dir_positive_or_negative:
315 fprintf (outf, " +-");
316 break;
317 case dir_negative_or_equal:
318 fprintf (outf, " -=");
319 break;
320 case dir_star:
321 fprintf (outf, " *");
322 break;
323 default:
324 fprintf (outf, "indep");
325 break;
326 }
327 }
328 fprintf (outf, "\n");
329 }
330
331 /* Print a vector of direction vectors. */
332
333 void
334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
335 int length)
336 {
337 unsigned j;
338 lambda_vector v;
339
340 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
341 print_direction_vector (outf, v, length);
342 }
343
344 /* Print out a vector VEC of length N to OUTFILE. */
345
346 static inline void
347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
348 {
349 int i;
350
351 for (i = 0; i < n; i++)
352 fprintf (outfile, "%3d ", vector[i]);
353 fprintf (outfile, "\n");
354 }
355
356 /* Print a vector of distance vectors. */
357
358 void
359 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
360 int length)
361 {
362 unsigned j;
363 lambda_vector v;
364
365 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
366 print_lambda_vector (outf, v, length);
367 }
368
369 /* Debug version. */
370
371 DEBUG_FUNCTION void
372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 {
374 dump_data_dependence_relation (stderr, ddr);
375 }
376
377 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
378
379 void
380 dump_data_dependence_relation (FILE *outf,
381 struct data_dependence_relation *ddr)
382 {
383 struct data_reference *dra, *drb;
384
385 fprintf (outf, "(Data Dep: \n");
386
387 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
388 {
389 if (ddr)
390 {
391 dra = DDR_A (ddr);
392 drb = DDR_B (ddr);
393 if (dra)
394 dump_data_reference (outf, dra);
395 else
396 fprintf (outf, " (nil)\n");
397 if (drb)
398 dump_data_reference (outf, drb);
399 else
400 fprintf (outf, " (nil)\n");
401 }
402 fprintf (outf, " (don't know)\n)\n");
403 return;
404 }
405
406 dra = DDR_A (ddr);
407 drb = DDR_B (ddr);
408 dump_data_reference (outf, dra);
409 dump_data_reference (outf, drb);
410
411 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
412 fprintf (outf, " (no dependence)\n");
413
414 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
415 {
416 unsigned int i;
417 struct loop *loopi;
418
419 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 {
421 fprintf (outf, " access_fn_A: ");
422 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
423 fprintf (outf, " access_fn_B: ");
424 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
425 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
426 }
427
428 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
429 fprintf (outf, " loop nest: (");
430 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
431 fprintf (outf, "%d ", loopi->num);
432 fprintf (outf, ")\n");
433
434 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 {
436 fprintf (outf, " distance_vector: ");
437 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
438 DDR_NB_LOOPS (ddr));
439 }
440
441 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 {
443 fprintf (outf, " direction_vector: ");
444 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
445 DDR_NB_LOOPS (ddr));
446 }
447 }
448
449 fprintf (outf, ")\n");
450 }
451
452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
453
454 void
455 dump_data_dependence_direction (FILE *file,
456 enum data_dependence_direction dir)
457 {
458 switch (dir)
459 {
460 case dir_positive:
461 fprintf (file, "+");
462 break;
463
464 case dir_negative:
465 fprintf (file, "-");
466 break;
467
468 case dir_equal:
469 fprintf (file, "=");
470 break;
471
472 case dir_positive_or_negative:
473 fprintf (file, "+-");
474 break;
475
476 case dir_positive_or_equal:
477 fprintf (file, "+=");
478 break;
479
480 case dir_negative_or_equal:
481 fprintf (file, "-=");
482 break;
483
484 case dir_star:
485 fprintf (file, "*");
486 break;
487
488 default:
489 break;
490 }
491 }
492
493 /* Dumps the distance and direction vectors in FILE. DDRS contains
494 the dependence relations, and VECT_SIZE is the size of the
495 dependence vectors, or in other words the number of loops in the
496 considered nest. */
497
498 void
499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
500 {
501 unsigned int i, j;
502 struct data_dependence_relation *ddr;
503 lambda_vector v;
504
505 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
506 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 {
508 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 {
510 fprintf (file, "DISTANCE_V (");
511 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
513 }
514
515 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 {
517 fprintf (file, "DIRECTION_V (");
518 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
519 fprintf (file, ")\n");
520 }
521 }
522
523 fprintf (file, "\n\n");
524 }
525
526 /* Dumps the data dependence relations DDRS in FILE. */
527
528 void
529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
530 {
531 unsigned int i;
532 struct data_dependence_relation *ddr;
533
534 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
535 dump_data_dependence_relation (file, ddr);
536
537 fprintf (file, "\n\n");
538 }
539
540 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
541 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
542 constant of type ssizetype, and returns true. If we cannot do this
543 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
544 is returned. */
545
546 static bool
547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
548 tree *var, tree *off)
549 {
550 tree var0, var1;
551 tree off0, off1;
552 enum tree_code ocode = code;
553
554 *var = NULL_TREE;
555 *off = NULL_TREE;
556
557 switch (code)
558 {
559 case INTEGER_CST:
560 *var = build_int_cst (type, 0);
561 *off = fold_convert (ssizetype, op0);
562 return true;
563
564 case POINTER_PLUS_EXPR:
565 ocode = PLUS_EXPR;
566 /* FALLTHROUGH */
567 case PLUS_EXPR:
568 case MINUS_EXPR:
569 split_constant_offset (op0, &var0, &off0);
570 split_constant_offset (op1, &var1, &off1);
571 *var = fold_build2 (code, type, var0, var1);
572 *off = size_binop (ocode, off0, off1);
573 return true;
574
575 case MULT_EXPR:
576 if (TREE_CODE (op1) != INTEGER_CST)
577 return false;
578
579 split_constant_offset (op0, &var0, &off0);
580 *var = fold_build2 (MULT_EXPR, type, var0, op1);
581 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
582 return true;
583
584 case ADDR_EXPR:
585 {
586 tree base, poffset;
587 HOST_WIDE_INT pbitsize, pbitpos;
588 enum machine_mode pmode;
589 int punsignedp, pvolatilep;
590
591 op0 = TREE_OPERAND (op0, 0);
592 if (!handled_component_p (op0))
593 return false;
594
595 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
596 &pmode, &punsignedp, &pvolatilep, false);
597
598 if (pbitpos % BITS_PER_UNIT != 0)
599 return false;
600 base = build_fold_addr_expr (base);
601 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
602
603 if (poffset)
604 {
605 split_constant_offset (poffset, &poffset, &off1);
606 off0 = size_binop (PLUS_EXPR, off0, off1);
607 if (POINTER_TYPE_P (TREE_TYPE (base)))
608 base = fold_build_pointer_plus (base, poffset);
609 else
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
612 }
613
614 var0 = fold_convert (type, base);
615
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
622 {
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
627 } */
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
631 return false;
632
633 *var = var0;
634 *off = off0;
635 return true;
636 }
637
638 case SSA_NAME:
639 {
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
642
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
644 return false;
645
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
649
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
651 }
652 CASE_CONVERT:
653 {
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
663 {
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
666 return true;
667 }
668 return false;
669 }
670
671 default:
672 return false;
673 }
674 }
675
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
678
679 void
680 split_constant_offset (tree exp, tree *var, tree *off)
681 {
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
683 enum tree_code code;
684
685 *var = exp;
686 *off = ssize_int (0);
687 STRIP_NOPS (exp);
688
689 if (tree_is_chrec (exp))
690 return;
691
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
696 {
697 *var = fold_convert (type, e);
698 *off = o;
699 }
700 }
701
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
704
705 static tree
706 canonicalize_base_object_address (tree addr)
707 {
708 tree orig = addr;
709
710 STRIP_NOPS (addr);
711
712 /* The base address may be obtained by casting from integer, in that case
713 keep the cast. */
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
715 return orig;
716
717 if (TREE_CODE (addr) != ADDR_EXPR)
718 return addr;
719
720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
721 }
722
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
725 otherwise. */
726
727 bool
728 dr_analyze_innermost (struct data_reference *dr)
729 {
730 gimple stmt = DR_STMT (dr);
731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
733 HOST_WIDE_INT pbitsize, pbitpos;
734 tree base, poffset;
735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
739 bool in_loop = (loop && loop->num);
740
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
743
744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
747
748 if (pbitpos % BITS_PER_UNIT != 0)
749 {
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
752 return false;
753 }
754
755 if (TREE_CODE (base) == MEM_REF)
756 {
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
758 {
759 if (!poffset)
760 {
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
763 }
764 else
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
766 }
767 base = TREE_OPERAND (base, 0);
768 }
769 else
770 base = build_fold_addr_expr (base);
771 if (in_loop)
772 {
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
774 false))
775 {
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
778 return false;
779 }
780 }
781 else
782 {
783 base_iv.base = base;
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
786 }
787
788 if (!poffset)
789 {
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
792 }
793 else
794 {
795 if (!in_loop)
796 {
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
799 }
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
802 {
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
805 " affine.\n");
806 return false;
807 }
808 }
809
810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
815
816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
819
820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
821
822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
823 DR_INIT (dr) = init;
824 DR_STEP (dr) = step;
825
826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
827
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
830
831 return true;
832 }
833
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
836
837 static void
838 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
839 {
840 VEC (tree, heap) *access_fns = NULL;
841 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
842 tree base, off, access_fn = NULL_TREE;
843 basic_block before_loop = NULL;
844
845 if (!nest)
846 {
847 DR_BASE_OBJECT (dr) = ref;
848 DR_ACCESS_FNS (dr) = NULL;
849 return;
850 }
851
852 before_loop = block_before_loop (nest);
853
854 /* Analyze access functions of dimensions we know to be independent. */
855 while (handled_component_p (aref))
856 {
857 /* For ARRAY_REFs the base is the reference with the index replaced
858 by zero. */
859 if (TREE_CODE (aref) == ARRAY_REF)
860 {
861 op = TREE_OPERAND (aref, 1);
862 access_fn = analyze_scalar_evolution (loop, op);
863 access_fn = instantiate_scev (before_loop, loop, access_fn);
864 VEC_safe_push (tree, heap, access_fns, access_fn);
865 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
866 }
867 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
868 into a two element array with a constant index. The base is
869 then just the immediate underlying object. */
870 else if (TREE_CODE (aref) == REALPART_EXPR)
871 {
872 ref = TREE_OPERAND (ref, 0);
873 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
874 }
875 else if (TREE_CODE (aref) == IMAGPART_EXPR)
876 {
877 ref = TREE_OPERAND (ref, 0);
878 VEC_safe_push (tree, heap, access_fns, integer_one_node);
879 }
880
881 aref = TREE_OPERAND (aref, 0);
882 }
883
884 /* If the address operand of a MEM_REF base has an evolution in the
885 analyzed nest, add it as an additional independent access-function. */
886 if (TREE_CODE (aref) == MEM_REF)
887 {
888 op = TREE_OPERAND (aref, 0);
889 access_fn = analyze_scalar_evolution (loop, op);
890 access_fn = instantiate_scev (before_loop, loop, access_fn);
891 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
892 {
893 base = initial_condition (access_fn);
894 split_constant_offset (base, &base, &off);
895 /* Fold the MEM_REF offset into the evolutions initial
896 value to make more bases comparable. */
897 if (!integer_zerop (TREE_OPERAND (aref, 1)))
898 {
899 off = size_binop (PLUS_EXPR, off,
900 fold_convert (ssizetype,
901 TREE_OPERAND (aref, 1)));
902 TREE_OPERAND (aref, 1)
903 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
904 }
905 access_fn = chrec_replace_initial_condition
906 (access_fn, fold_convert (TREE_TYPE (base), off));
907 TREE_OPERAND (aref, 0) = base;
908 VEC_safe_push (tree, heap, access_fns, access_fn);
909 }
910 }
911
912 if (TREE_CODE (ref) == MEM_REF
913 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
914 && integer_zerop (TREE_OPERAND (ref, 1)))
915 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
916
917 /* For canonicalization purposes we'd like to strip all outermost
918 zero-offset component-refs.
919 ??? For now simply handle zero-index array-refs. */
920 while (TREE_CODE (ref) == ARRAY_REF
921 && integer_zerop (TREE_OPERAND (ref, 1)))
922 ref = TREE_OPERAND (ref, 0);
923
924 DR_BASE_OBJECT (dr) = ref;
925 DR_ACCESS_FNS (dr) = access_fns;
926 }
927
928 /* Extracts the alias analysis information from the memory reference DR. */
929
930 static void
931 dr_analyze_alias (struct data_reference *dr)
932 {
933 tree ref = DR_REF (dr);
934 tree base = get_base_address (ref), addr;
935
936 if (INDIRECT_REF_P (base)
937 || TREE_CODE (base) == MEM_REF)
938 {
939 addr = TREE_OPERAND (base, 0);
940 if (TREE_CODE (addr) == SSA_NAME)
941 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
942 }
943 }
944
945 /* Frees data reference DR. */
946
947 void
948 free_data_ref (data_reference_p dr)
949 {
950 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
951 free (dr);
952 }
953
954 /* Analyzes memory reference MEMREF accessed in STMT. The reference
955 is read if IS_READ is true, write otherwise. Returns the
956 data_reference description of MEMREF. NEST is the outermost loop
957 in which the reference should be instantiated, LOOP is the loop in
958 which the data reference should be analyzed. */
959
960 struct data_reference *
961 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
962 bool is_read)
963 {
964 struct data_reference *dr;
965
966 if (dump_file && (dump_flags & TDF_DETAILS))
967 {
968 fprintf (dump_file, "Creating dr for ");
969 print_generic_expr (dump_file, memref, TDF_SLIM);
970 fprintf (dump_file, "\n");
971 }
972
973 dr = XCNEW (struct data_reference);
974 DR_STMT (dr) = stmt;
975 DR_REF (dr) = memref;
976 DR_IS_READ (dr) = is_read;
977
978 dr_analyze_innermost (dr);
979 dr_analyze_indices (dr, nest, loop);
980 dr_analyze_alias (dr);
981
982 if (dump_file && (dump_flags & TDF_DETAILS))
983 {
984 unsigned i;
985 fprintf (dump_file, "\tbase_address: ");
986 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
987 fprintf (dump_file, "\n\toffset from base address: ");
988 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
989 fprintf (dump_file, "\n\tconstant offset from base address: ");
990 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
991 fprintf (dump_file, "\n\tstep: ");
992 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
993 fprintf (dump_file, "\n\taligned to: ");
994 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
995 fprintf (dump_file, "\n\tbase_object: ");
996 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
997 fprintf (dump_file, "\n");
998 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
999 {
1000 fprintf (dump_file, "\tAccess function %d: ", i);
1001 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1002 }
1003 }
1004
1005 return dr;
1006 }
1007
1008 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1009 expressions. */
1010 static bool
1011 dr_equal_offsets_p1 (tree offset1, tree offset2)
1012 {
1013 bool res;
1014
1015 STRIP_NOPS (offset1);
1016 STRIP_NOPS (offset2);
1017
1018 if (offset1 == offset2)
1019 return true;
1020
1021 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1022 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1023 return false;
1024
1025 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1026 TREE_OPERAND (offset2, 0));
1027
1028 if (!res || !BINARY_CLASS_P (offset1))
1029 return res;
1030
1031 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1032 TREE_OPERAND (offset2, 1));
1033
1034 return res;
1035 }
1036
1037 /* Check if DRA and DRB have equal offsets. */
1038 bool
1039 dr_equal_offsets_p (struct data_reference *dra,
1040 struct data_reference *drb)
1041 {
1042 tree offset1, offset2;
1043
1044 offset1 = DR_OFFSET (dra);
1045 offset2 = DR_OFFSET (drb);
1046
1047 return dr_equal_offsets_p1 (offset1, offset2);
1048 }
1049
1050 /* Returns true if FNA == FNB. */
1051
1052 static bool
1053 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1054 {
1055 unsigned i, n = VEC_length (tree, fna);
1056
1057 if (n != VEC_length (tree, fnb))
1058 return false;
1059
1060 for (i = 0; i < n; i++)
1061 if (!operand_equal_p (VEC_index (tree, fna, i),
1062 VEC_index (tree, fnb, i), 0))
1063 return false;
1064
1065 return true;
1066 }
1067
1068 /* If all the functions in CF are the same, returns one of them,
1069 otherwise returns NULL. */
1070
1071 static affine_fn
1072 common_affine_function (conflict_function *cf)
1073 {
1074 unsigned i;
1075 affine_fn comm;
1076
1077 if (!CF_NONTRIVIAL_P (cf))
1078 return NULL;
1079
1080 comm = cf->fns[0];
1081
1082 for (i = 1; i < cf->n; i++)
1083 if (!affine_function_equal_p (comm, cf->fns[i]))
1084 return NULL;
1085
1086 return comm;
1087 }
1088
1089 /* Returns the base of the affine function FN. */
1090
1091 static tree
1092 affine_function_base (affine_fn fn)
1093 {
1094 return VEC_index (tree, fn, 0);
1095 }
1096
1097 /* Returns true if FN is a constant. */
1098
1099 static bool
1100 affine_function_constant_p (affine_fn fn)
1101 {
1102 unsigned i;
1103 tree coef;
1104
1105 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1106 if (!integer_zerop (coef))
1107 return false;
1108
1109 return true;
1110 }
1111
1112 /* Returns true if FN is the zero constant function. */
1113
1114 static bool
1115 affine_function_zero_p (affine_fn fn)
1116 {
1117 return (integer_zerop (affine_function_base (fn))
1118 && affine_function_constant_p (fn));
1119 }
1120
1121 /* Returns a signed integer type with the largest precision from TA
1122 and TB. */
1123
1124 static tree
1125 signed_type_for_types (tree ta, tree tb)
1126 {
1127 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1128 return signed_type_for (ta);
1129 else
1130 return signed_type_for (tb);
1131 }
1132
1133 /* Applies operation OP on affine functions FNA and FNB, and returns the
1134 result. */
1135
1136 static affine_fn
1137 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1138 {
1139 unsigned i, n, m;
1140 affine_fn ret;
1141 tree coef;
1142
1143 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1144 {
1145 n = VEC_length (tree, fna);
1146 m = VEC_length (tree, fnb);
1147 }
1148 else
1149 {
1150 n = VEC_length (tree, fnb);
1151 m = VEC_length (tree, fna);
1152 }
1153
1154 ret = VEC_alloc (tree, heap, m);
1155 for (i = 0; i < n; i++)
1156 {
1157 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1158 TREE_TYPE (VEC_index (tree, fnb, i)));
1159
1160 VEC_quick_push (tree, ret,
1161 fold_build2 (op, type,
1162 VEC_index (tree, fna, i),
1163 VEC_index (tree, fnb, i)));
1164 }
1165
1166 for (; VEC_iterate (tree, fna, i, coef); i++)
1167 VEC_quick_push (tree, ret,
1168 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1169 coef, integer_zero_node));
1170 for (; VEC_iterate (tree, fnb, i, coef); i++)
1171 VEC_quick_push (tree, ret,
1172 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1173 integer_zero_node, coef));
1174
1175 return ret;
1176 }
1177
1178 /* Returns the sum of affine functions FNA and FNB. */
1179
1180 static affine_fn
1181 affine_fn_plus (affine_fn fna, affine_fn fnb)
1182 {
1183 return affine_fn_op (PLUS_EXPR, fna, fnb);
1184 }
1185
1186 /* Returns the difference of affine functions FNA and FNB. */
1187
1188 static affine_fn
1189 affine_fn_minus (affine_fn fna, affine_fn fnb)
1190 {
1191 return affine_fn_op (MINUS_EXPR, fna, fnb);
1192 }
1193
1194 /* Frees affine function FN. */
1195
1196 static void
1197 affine_fn_free (affine_fn fn)
1198 {
1199 VEC_free (tree, heap, fn);
1200 }
1201
1202 /* Determine for each subscript in the data dependence relation DDR
1203 the distance. */
1204
1205 static void
1206 compute_subscript_distance (struct data_dependence_relation *ddr)
1207 {
1208 conflict_function *cf_a, *cf_b;
1209 affine_fn fn_a, fn_b, diff;
1210
1211 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1212 {
1213 unsigned int i;
1214
1215 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1216 {
1217 struct subscript *subscript;
1218
1219 subscript = DDR_SUBSCRIPT (ddr, i);
1220 cf_a = SUB_CONFLICTS_IN_A (subscript);
1221 cf_b = SUB_CONFLICTS_IN_B (subscript);
1222
1223 fn_a = common_affine_function (cf_a);
1224 fn_b = common_affine_function (cf_b);
1225 if (!fn_a || !fn_b)
1226 {
1227 SUB_DISTANCE (subscript) = chrec_dont_know;
1228 return;
1229 }
1230 diff = affine_fn_minus (fn_a, fn_b);
1231
1232 if (affine_function_constant_p (diff))
1233 SUB_DISTANCE (subscript) = affine_function_base (diff);
1234 else
1235 SUB_DISTANCE (subscript) = chrec_dont_know;
1236
1237 affine_fn_free (diff);
1238 }
1239 }
1240 }
1241
1242 /* Returns the conflict function for "unknown". */
1243
1244 static conflict_function *
1245 conflict_fn_not_known (void)
1246 {
1247 conflict_function *fn = XCNEW (conflict_function);
1248 fn->n = NOT_KNOWN;
1249
1250 return fn;
1251 }
1252
1253 /* Returns the conflict function for "independent". */
1254
1255 static conflict_function *
1256 conflict_fn_no_dependence (void)
1257 {
1258 conflict_function *fn = XCNEW (conflict_function);
1259 fn->n = NO_DEPENDENCE;
1260
1261 return fn;
1262 }
1263
1264 /* Returns true if the address of OBJ is invariant in LOOP. */
1265
1266 static bool
1267 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1268 {
1269 while (handled_component_p (obj))
1270 {
1271 if (TREE_CODE (obj) == ARRAY_REF)
1272 {
1273 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1274 need to check the stride and the lower bound of the reference. */
1275 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1276 loop->num)
1277 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1278 loop->num))
1279 return false;
1280 }
1281 else if (TREE_CODE (obj) == COMPONENT_REF)
1282 {
1283 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1284 loop->num))
1285 return false;
1286 }
1287 obj = TREE_OPERAND (obj, 0);
1288 }
1289
1290 if (!INDIRECT_REF_P (obj)
1291 && TREE_CODE (obj) != MEM_REF)
1292 return true;
1293
1294 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1295 loop->num);
1296 }
1297
1298 /* Returns false if we can prove that data references A and B do not alias,
1299 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1300 considered. */
1301
1302 bool
1303 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1304 bool loop_nest)
1305 {
1306 tree addr_a = DR_BASE_OBJECT (a);
1307 tree addr_b = DR_BASE_OBJECT (b);
1308
1309 /* If we are not processing a loop nest but scalar code we
1310 do not need to care about possible cross-iteration dependences
1311 and thus can process the full original reference. Do so,
1312 similar to how loop invariant motion applies extra offset-based
1313 disambiguation. */
1314 if (!loop_nest)
1315 {
1316 aff_tree off1, off2;
1317 double_int size1, size2;
1318 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1319 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1320 aff_combination_scale (&off1, double_int_minus_one);
1321 aff_combination_add (&off2, &off1);
1322 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1323 return false;
1324 }
1325
1326 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1327 return refs_output_dependent_p (addr_a, addr_b);
1328 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1329 return refs_anti_dependent_p (addr_a, addr_b);
1330 return refs_may_alias_p (addr_a, addr_b);
1331 }
1332
1333 static void compute_self_dependence (struct data_dependence_relation *);
1334
1335 /* Initialize a data dependence relation between data accesses A and
1336 B. NB_LOOPS is the number of loops surrounding the references: the
1337 size of the classic distance/direction vectors. */
1338
1339 static struct data_dependence_relation *
1340 initialize_data_dependence_relation (struct data_reference *a,
1341 struct data_reference *b,
1342 VEC (loop_p, heap) *loop_nest)
1343 {
1344 struct data_dependence_relation *res;
1345 unsigned int i;
1346
1347 res = XNEW (struct data_dependence_relation);
1348 DDR_A (res) = a;
1349 DDR_B (res) = b;
1350 DDR_LOOP_NEST (res) = NULL;
1351 DDR_REVERSED_P (res) = false;
1352 DDR_SUBSCRIPTS (res) = NULL;
1353 DDR_DIR_VECTS (res) = NULL;
1354 DDR_DIST_VECTS (res) = NULL;
1355
1356 if (a == NULL || b == NULL)
1357 {
1358 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1359 return res;
1360 }
1361
1362 /* If the data references do not alias, then they are independent. */
1363 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1364 {
1365 DDR_ARE_DEPENDENT (res) = chrec_known;
1366 return res;
1367 }
1368
1369 /* When the references are exactly the same, don't spend time doing
1370 the data dependence tests, just initialize the ddr and return. */
1371 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1372 {
1373 DDR_AFFINE_P (res) = true;
1374 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1375 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1376 DDR_LOOP_NEST (res) = loop_nest;
1377 DDR_INNER_LOOP (res) = 0;
1378 DDR_SELF_REFERENCE (res) = true;
1379 compute_self_dependence (res);
1380 return res;
1381 }
1382
1383 /* If the references do not access the same object, we do not know
1384 whether they alias or not. */
1385 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1386 {
1387 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1388 return res;
1389 }
1390
1391 /* If the base of the object is not invariant in the loop nest, we cannot
1392 analyze it. TODO -- in fact, it would suffice to record that there may
1393 be arbitrary dependences in the loops where the base object varies. */
1394 if (loop_nest
1395 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1396 DR_BASE_OBJECT (a)))
1397 {
1398 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1399 return res;
1400 }
1401
1402 /* If the number of dimensions of the access to not agree we can have
1403 a pointer access to a component of the array element type and an
1404 array access while the base-objects are still the same. Punt. */
1405 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1406 {
1407 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1408 return res;
1409 }
1410
1411 DDR_AFFINE_P (res) = true;
1412 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1413 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1414 DDR_LOOP_NEST (res) = loop_nest;
1415 DDR_INNER_LOOP (res) = 0;
1416 DDR_SELF_REFERENCE (res) = false;
1417
1418 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1419 {
1420 struct subscript *subscript;
1421
1422 subscript = XNEW (struct subscript);
1423 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1424 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1425 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1426 SUB_DISTANCE (subscript) = chrec_dont_know;
1427 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1428 }
1429
1430 return res;
1431 }
1432
1433 /* Frees memory used by the conflict function F. */
1434
1435 static void
1436 free_conflict_function (conflict_function *f)
1437 {
1438 unsigned i;
1439
1440 if (CF_NONTRIVIAL_P (f))
1441 {
1442 for (i = 0; i < f->n; i++)
1443 affine_fn_free (f->fns[i]);
1444 }
1445 free (f);
1446 }
1447
1448 /* Frees memory used by SUBSCRIPTS. */
1449
1450 static void
1451 free_subscripts (VEC (subscript_p, heap) *subscripts)
1452 {
1453 unsigned i;
1454 subscript_p s;
1455
1456 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1457 {
1458 free_conflict_function (s->conflicting_iterations_in_a);
1459 free_conflict_function (s->conflicting_iterations_in_b);
1460 free (s);
1461 }
1462 VEC_free (subscript_p, heap, subscripts);
1463 }
1464
1465 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1466 description. */
1467
1468 static inline void
1469 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1470 tree chrec)
1471 {
1472 if (dump_file && (dump_flags & TDF_DETAILS))
1473 {
1474 fprintf (dump_file, "(dependence classified: ");
1475 print_generic_expr (dump_file, chrec, 0);
1476 fprintf (dump_file, ")\n");
1477 }
1478
1479 DDR_ARE_DEPENDENT (ddr) = chrec;
1480 free_subscripts (DDR_SUBSCRIPTS (ddr));
1481 DDR_SUBSCRIPTS (ddr) = NULL;
1482 }
1483
1484 /* The dependence relation DDR cannot be represented by a distance
1485 vector. */
1486
1487 static inline void
1488 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1489 {
1490 if (dump_file && (dump_flags & TDF_DETAILS))
1491 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1492
1493 DDR_AFFINE_P (ddr) = false;
1494 }
1495
1496 \f
1497
1498 /* This section contains the classic Banerjee tests. */
1499
1500 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1501 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1502
1503 static inline bool
1504 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1505 {
1506 return (evolution_function_is_constant_p (chrec_a)
1507 && evolution_function_is_constant_p (chrec_b));
1508 }
1509
1510 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1511 variable, i.e., if the SIV (Single Index Variable) test is true. */
1512
1513 static bool
1514 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1515 {
1516 if ((evolution_function_is_constant_p (chrec_a)
1517 && evolution_function_is_univariate_p (chrec_b))
1518 || (evolution_function_is_constant_p (chrec_b)
1519 && evolution_function_is_univariate_p (chrec_a)))
1520 return true;
1521
1522 if (evolution_function_is_univariate_p (chrec_a)
1523 && evolution_function_is_univariate_p (chrec_b))
1524 {
1525 switch (TREE_CODE (chrec_a))
1526 {
1527 case POLYNOMIAL_CHREC:
1528 switch (TREE_CODE (chrec_b))
1529 {
1530 case POLYNOMIAL_CHREC:
1531 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1532 return false;
1533
1534 default:
1535 return true;
1536 }
1537
1538 default:
1539 return true;
1540 }
1541 }
1542
1543 return false;
1544 }
1545
1546 /* Creates a conflict function with N dimensions. The affine functions
1547 in each dimension follow. */
1548
1549 static conflict_function *
1550 conflict_fn (unsigned n, ...)
1551 {
1552 unsigned i;
1553 conflict_function *ret = XCNEW (conflict_function);
1554 va_list ap;
1555
1556 gcc_assert (0 < n && n <= MAX_DIM);
1557 va_start(ap, n);
1558
1559 ret->n = n;
1560 for (i = 0; i < n; i++)
1561 ret->fns[i] = va_arg (ap, affine_fn);
1562 va_end(ap);
1563
1564 return ret;
1565 }
1566
1567 /* Returns constant affine function with value CST. */
1568
1569 static affine_fn
1570 affine_fn_cst (tree cst)
1571 {
1572 affine_fn fn = VEC_alloc (tree, heap, 1);
1573 VEC_quick_push (tree, fn, cst);
1574 return fn;
1575 }
1576
1577 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1578
1579 static affine_fn
1580 affine_fn_univar (tree cst, unsigned dim, tree coef)
1581 {
1582 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1583 unsigned i;
1584
1585 gcc_assert (dim > 0);
1586 VEC_quick_push (tree, fn, cst);
1587 for (i = 1; i < dim; i++)
1588 VEC_quick_push (tree, fn, integer_zero_node);
1589 VEC_quick_push (tree, fn, coef);
1590 return fn;
1591 }
1592
1593 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1594 *OVERLAPS_B are initialized to the functions that describe the
1595 relation between the elements accessed twice by CHREC_A and
1596 CHREC_B. For k >= 0, the following property is verified:
1597
1598 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1599
1600 static void
1601 analyze_ziv_subscript (tree chrec_a,
1602 tree chrec_b,
1603 conflict_function **overlaps_a,
1604 conflict_function **overlaps_b,
1605 tree *last_conflicts)
1606 {
1607 tree type, difference;
1608 dependence_stats.num_ziv++;
1609
1610 if (dump_file && (dump_flags & TDF_DETAILS))
1611 fprintf (dump_file, "(analyze_ziv_subscript \n");
1612
1613 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1614 chrec_a = chrec_convert (type, chrec_a, NULL);
1615 chrec_b = chrec_convert (type, chrec_b, NULL);
1616 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1617
1618 switch (TREE_CODE (difference))
1619 {
1620 case INTEGER_CST:
1621 if (integer_zerop (difference))
1622 {
1623 /* The difference is equal to zero: the accessed index
1624 overlaps for each iteration in the loop. */
1625 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1626 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1627 *last_conflicts = chrec_dont_know;
1628 dependence_stats.num_ziv_dependent++;
1629 }
1630 else
1631 {
1632 /* The accesses do not overlap. */
1633 *overlaps_a = conflict_fn_no_dependence ();
1634 *overlaps_b = conflict_fn_no_dependence ();
1635 *last_conflicts = integer_zero_node;
1636 dependence_stats.num_ziv_independent++;
1637 }
1638 break;
1639
1640 default:
1641 /* We're not sure whether the indexes overlap. For the moment,
1642 conservatively answer "don't know". */
1643 if (dump_file && (dump_flags & TDF_DETAILS))
1644 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1645
1646 *overlaps_a = conflict_fn_not_known ();
1647 *overlaps_b = conflict_fn_not_known ();
1648 *last_conflicts = chrec_dont_know;
1649 dependence_stats.num_ziv_unimplemented++;
1650 break;
1651 }
1652
1653 if (dump_file && (dump_flags & TDF_DETAILS))
1654 fprintf (dump_file, ")\n");
1655 }
1656
1657 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1658 and only if it fits to the int type. If this is not the case, or the
1659 bound on the number of iterations of LOOP could not be derived, returns
1660 chrec_dont_know. */
1661
1662 static tree
1663 max_stmt_executions_tree (struct loop *loop)
1664 {
1665 double_int nit;
1666
1667 if (!max_stmt_executions (loop, true, &nit))
1668 return chrec_dont_know;
1669
1670 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1671 return chrec_dont_know;
1672
1673 return double_int_to_tree (unsigned_type_node, nit);
1674 }
1675
1676 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1677 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1678 *OVERLAPS_B are initialized to the functions that describe the
1679 relation between the elements accessed twice by CHREC_A and
1680 CHREC_B. For k >= 0, the following property is verified:
1681
1682 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1683
1684 static void
1685 analyze_siv_subscript_cst_affine (tree chrec_a,
1686 tree chrec_b,
1687 conflict_function **overlaps_a,
1688 conflict_function **overlaps_b,
1689 tree *last_conflicts)
1690 {
1691 bool value0, value1, value2;
1692 tree type, difference, tmp;
1693
1694 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1695 chrec_a = chrec_convert (type, chrec_a, NULL);
1696 chrec_b = chrec_convert (type, chrec_b, NULL);
1697 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1698
1699 if (!chrec_is_positive (initial_condition (difference), &value0))
1700 {
1701 if (dump_file && (dump_flags & TDF_DETAILS))
1702 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1703
1704 dependence_stats.num_siv_unimplemented++;
1705 *overlaps_a = conflict_fn_not_known ();
1706 *overlaps_b = conflict_fn_not_known ();
1707 *last_conflicts = chrec_dont_know;
1708 return;
1709 }
1710 else
1711 {
1712 if (value0 == false)
1713 {
1714 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1715 {
1716 if (dump_file && (dump_flags & TDF_DETAILS))
1717 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1718
1719 *overlaps_a = conflict_fn_not_known ();
1720 *overlaps_b = conflict_fn_not_known ();
1721 *last_conflicts = chrec_dont_know;
1722 dependence_stats.num_siv_unimplemented++;
1723 return;
1724 }
1725 else
1726 {
1727 if (value1 == true)
1728 {
1729 /* Example:
1730 chrec_a = 12
1731 chrec_b = {10, +, 1}
1732 */
1733
1734 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1735 {
1736 HOST_WIDE_INT numiter;
1737 struct loop *loop = get_chrec_loop (chrec_b);
1738
1739 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1740 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1741 fold_build1 (ABS_EXPR, type, difference),
1742 CHREC_RIGHT (chrec_b));
1743 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1744 *last_conflicts = integer_one_node;
1745
1746
1747 /* Perform weak-zero siv test to see if overlap is
1748 outside the loop bounds. */
1749 numiter = max_stmt_executions_int (loop, true);
1750
1751 if (numiter >= 0
1752 && compare_tree_int (tmp, numiter) > 0)
1753 {
1754 free_conflict_function (*overlaps_a);
1755 free_conflict_function (*overlaps_b);
1756 *overlaps_a = conflict_fn_no_dependence ();
1757 *overlaps_b = conflict_fn_no_dependence ();
1758 *last_conflicts = integer_zero_node;
1759 dependence_stats.num_siv_independent++;
1760 return;
1761 }
1762 dependence_stats.num_siv_dependent++;
1763 return;
1764 }
1765
1766 /* When the step does not divide the difference, there are
1767 no overlaps. */
1768 else
1769 {
1770 *overlaps_a = conflict_fn_no_dependence ();
1771 *overlaps_b = conflict_fn_no_dependence ();
1772 *last_conflicts = integer_zero_node;
1773 dependence_stats.num_siv_independent++;
1774 return;
1775 }
1776 }
1777
1778 else
1779 {
1780 /* Example:
1781 chrec_a = 12
1782 chrec_b = {10, +, -1}
1783
1784 In this case, chrec_a will not overlap with chrec_b. */
1785 *overlaps_a = conflict_fn_no_dependence ();
1786 *overlaps_b = conflict_fn_no_dependence ();
1787 *last_conflicts = integer_zero_node;
1788 dependence_stats.num_siv_independent++;
1789 return;
1790 }
1791 }
1792 }
1793 else
1794 {
1795 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1796 {
1797 if (dump_file && (dump_flags & TDF_DETAILS))
1798 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1799
1800 *overlaps_a = conflict_fn_not_known ();
1801 *overlaps_b = conflict_fn_not_known ();
1802 *last_conflicts = chrec_dont_know;
1803 dependence_stats.num_siv_unimplemented++;
1804 return;
1805 }
1806 else
1807 {
1808 if (value2 == false)
1809 {
1810 /* Example:
1811 chrec_a = 3
1812 chrec_b = {10, +, -1}
1813 */
1814 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1815 {
1816 HOST_WIDE_INT numiter;
1817 struct loop *loop = get_chrec_loop (chrec_b);
1818
1819 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1820 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1821 CHREC_RIGHT (chrec_b));
1822 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1823 *last_conflicts = integer_one_node;
1824
1825 /* Perform weak-zero siv test to see if overlap is
1826 outside the loop bounds. */
1827 numiter = max_stmt_executions_int (loop, true);
1828
1829 if (numiter >= 0
1830 && compare_tree_int (tmp, numiter) > 0)
1831 {
1832 free_conflict_function (*overlaps_a);
1833 free_conflict_function (*overlaps_b);
1834 *overlaps_a = conflict_fn_no_dependence ();
1835 *overlaps_b = conflict_fn_no_dependence ();
1836 *last_conflicts = integer_zero_node;
1837 dependence_stats.num_siv_independent++;
1838 return;
1839 }
1840 dependence_stats.num_siv_dependent++;
1841 return;
1842 }
1843
1844 /* When the step does not divide the difference, there
1845 are no overlaps. */
1846 else
1847 {
1848 *overlaps_a = conflict_fn_no_dependence ();
1849 *overlaps_b = conflict_fn_no_dependence ();
1850 *last_conflicts = integer_zero_node;
1851 dependence_stats.num_siv_independent++;
1852 return;
1853 }
1854 }
1855 else
1856 {
1857 /* Example:
1858 chrec_a = 3
1859 chrec_b = {4, +, 1}
1860
1861 In this case, chrec_a will not overlap with chrec_b. */
1862 *overlaps_a = conflict_fn_no_dependence ();
1863 *overlaps_b = conflict_fn_no_dependence ();
1864 *last_conflicts = integer_zero_node;
1865 dependence_stats.num_siv_independent++;
1866 return;
1867 }
1868 }
1869 }
1870 }
1871 }
1872
1873 /* Helper recursive function for initializing the matrix A. Returns
1874 the initial value of CHREC. */
1875
1876 static tree
1877 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1878 {
1879 gcc_assert (chrec);
1880
1881 switch (TREE_CODE (chrec))
1882 {
1883 case POLYNOMIAL_CHREC:
1884 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1885
1886 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1887 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1888
1889 case PLUS_EXPR:
1890 case MULT_EXPR:
1891 case MINUS_EXPR:
1892 {
1893 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1894 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1895
1896 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1897 }
1898
1899 case NOP_EXPR:
1900 {
1901 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1902 return chrec_convert (chrec_type (chrec), op, NULL);
1903 }
1904
1905 case BIT_NOT_EXPR:
1906 {
1907 /* Handle ~X as -1 - X. */
1908 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1909 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1910 build_int_cst (TREE_TYPE (chrec), -1), op);
1911 }
1912
1913 case INTEGER_CST:
1914 return chrec;
1915
1916 default:
1917 gcc_unreachable ();
1918 return NULL_TREE;
1919 }
1920 }
1921
1922 #define FLOOR_DIV(x,y) ((x) / (y))
1923
1924 /* Solves the special case of the Diophantine equation:
1925 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1926
1927 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1928 number of iterations that loops X and Y run. The overlaps will be
1929 constructed as evolutions in dimension DIM. */
1930
1931 static void
1932 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1933 affine_fn *overlaps_a,
1934 affine_fn *overlaps_b,
1935 tree *last_conflicts, int dim)
1936 {
1937 if (((step_a > 0 && step_b > 0)
1938 || (step_a < 0 && step_b < 0)))
1939 {
1940 int step_overlaps_a, step_overlaps_b;
1941 int gcd_steps_a_b, last_conflict, tau2;
1942
1943 gcd_steps_a_b = gcd (step_a, step_b);
1944 step_overlaps_a = step_b / gcd_steps_a_b;
1945 step_overlaps_b = step_a / gcd_steps_a_b;
1946
1947 if (niter > 0)
1948 {
1949 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1950 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1951 last_conflict = tau2;
1952 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1953 }
1954 else
1955 *last_conflicts = chrec_dont_know;
1956
1957 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1958 build_int_cst (NULL_TREE,
1959 step_overlaps_a));
1960 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1961 build_int_cst (NULL_TREE,
1962 step_overlaps_b));
1963 }
1964
1965 else
1966 {
1967 *overlaps_a = affine_fn_cst (integer_zero_node);
1968 *overlaps_b = affine_fn_cst (integer_zero_node);
1969 *last_conflicts = integer_zero_node;
1970 }
1971 }
1972
1973 /* Solves the special case of a Diophantine equation where CHREC_A is
1974 an affine bivariate function, and CHREC_B is an affine univariate
1975 function. For example,
1976
1977 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1978
1979 has the following overlapping functions:
1980
1981 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1982 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1983 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1984
1985 FORNOW: This is a specialized implementation for a case occurring in
1986 a common benchmark. Implement the general algorithm. */
1987
1988 static void
1989 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1990 conflict_function **overlaps_a,
1991 conflict_function **overlaps_b,
1992 tree *last_conflicts)
1993 {
1994 bool xz_p, yz_p, xyz_p;
1995 int step_x, step_y, step_z;
1996 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1997 affine_fn overlaps_a_xz, overlaps_b_xz;
1998 affine_fn overlaps_a_yz, overlaps_b_yz;
1999 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2000 affine_fn ova1, ova2, ovb;
2001 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2002
2003 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2004 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2005 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2006
2007 niter_x =
2008 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2009 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2010 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2011
2012 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2013 {
2014 if (dump_file && (dump_flags & TDF_DETAILS))
2015 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2016
2017 *overlaps_a = conflict_fn_not_known ();
2018 *overlaps_b = conflict_fn_not_known ();
2019 *last_conflicts = chrec_dont_know;
2020 return;
2021 }
2022
2023 niter = MIN (niter_x, niter_z);
2024 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2025 &overlaps_a_xz,
2026 &overlaps_b_xz,
2027 &last_conflicts_xz, 1);
2028 niter = MIN (niter_y, niter_z);
2029 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2030 &overlaps_a_yz,
2031 &overlaps_b_yz,
2032 &last_conflicts_yz, 2);
2033 niter = MIN (niter_x, niter_z);
2034 niter = MIN (niter_y, niter);
2035 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2036 &overlaps_a_xyz,
2037 &overlaps_b_xyz,
2038 &last_conflicts_xyz, 3);
2039
2040 xz_p = !integer_zerop (last_conflicts_xz);
2041 yz_p = !integer_zerop (last_conflicts_yz);
2042 xyz_p = !integer_zerop (last_conflicts_xyz);
2043
2044 if (xz_p || yz_p || xyz_p)
2045 {
2046 ova1 = affine_fn_cst (integer_zero_node);
2047 ova2 = affine_fn_cst (integer_zero_node);
2048 ovb = affine_fn_cst (integer_zero_node);
2049 if (xz_p)
2050 {
2051 affine_fn t0 = ova1;
2052 affine_fn t2 = ovb;
2053
2054 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2055 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2056 affine_fn_free (t0);
2057 affine_fn_free (t2);
2058 *last_conflicts = last_conflicts_xz;
2059 }
2060 if (yz_p)
2061 {
2062 affine_fn t0 = ova2;
2063 affine_fn t2 = ovb;
2064
2065 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2066 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2067 affine_fn_free (t0);
2068 affine_fn_free (t2);
2069 *last_conflicts = last_conflicts_yz;
2070 }
2071 if (xyz_p)
2072 {
2073 affine_fn t0 = ova1;
2074 affine_fn t2 = ova2;
2075 affine_fn t4 = ovb;
2076
2077 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2078 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2079 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2080 affine_fn_free (t0);
2081 affine_fn_free (t2);
2082 affine_fn_free (t4);
2083 *last_conflicts = last_conflicts_xyz;
2084 }
2085 *overlaps_a = conflict_fn (2, ova1, ova2);
2086 *overlaps_b = conflict_fn (1, ovb);
2087 }
2088 else
2089 {
2090 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2091 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2092 *last_conflicts = integer_zero_node;
2093 }
2094
2095 affine_fn_free (overlaps_a_xz);
2096 affine_fn_free (overlaps_b_xz);
2097 affine_fn_free (overlaps_a_yz);
2098 affine_fn_free (overlaps_b_yz);
2099 affine_fn_free (overlaps_a_xyz);
2100 affine_fn_free (overlaps_b_xyz);
2101 }
2102
2103 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2104
2105 static void
2106 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2107 int size)
2108 {
2109 memcpy (vec2, vec1, size * sizeof (*vec1));
2110 }
2111
2112 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2113
2114 static void
2115 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2116 int m, int n)
2117 {
2118 int i;
2119
2120 for (i = 0; i < m; i++)
2121 lambda_vector_copy (mat1[i], mat2[i], n);
2122 }
2123
2124 /* Store the N x N identity matrix in MAT. */
2125
2126 static void
2127 lambda_matrix_id (lambda_matrix mat, int size)
2128 {
2129 int i, j;
2130
2131 for (i = 0; i < size; i++)
2132 for (j = 0; j < size; j++)
2133 mat[i][j] = (i == j) ? 1 : 0;
2134 }
2135
2136 /* Return the first nonzero element of vector VEC1 between START and N.
2137 We must have START <= N. Returns N if VEC1 is the zero vector. */
2138
2139 static int
2140 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2141 {
2142 int j = start;
2143 while (j < n && vec1[j] == 0)
2144 j++;
2145 return j;
2146 }
2147
2148 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2149 R2 = R2 + CONST1 * R1. */
2150
2151 static void
2152 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2153 {
2154 int i;
2155
2156 if (const1 == 0)
2157 return;
2158
2159 for (i = 0; i < n; i++)
2160 mat[r2][i] += const1 * mat[r1][i];
2161 }
2162
2163 /* Swap rows R1 and R2 in matrix MAT. */
2164
2165 static void
2166 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2167 {
2168 lambda_vector row;
2169
2170 row = mat[r1];
2171 mat[r1] = mat[r2];
2172 mat[r2] = row;
2173 }
2174
2175 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2176 and store the result in VEC2. */
2177
2178 static void
2179 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2180 int size, int const1)
2181 {
2182 int i;
2183
2184 if (const1 == 0)
2185 lambda_vector_clear (vec2, size);
2186 else
2187 for (i = 0; i < size; i++)
2188 vec2[i] = const1 * vec1[i];
2189 }
2190
2191 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2192
2193 static void
2194 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2195 int size)
2196 {
2197 lambda_vector_mult_const (vec1, vec2, size, -1);
2198 }
2199
2200 /* Negate row R1 of matrix MAT which has N columns. */
2201
2202 static void
2203 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2204 {
2205 lambda_vector_negate (mat[r1], mat[r1], n);
2206 }
2207
2208 /* Return true if two vectors are equal. */
2209
2210 static bool
2211 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2212 {
2213 int i;
2214 for (i = 0; i < size; i++)
2215 if (vec1[i] != vec2[i])
2216 return false;
2217 return true;
2218 }
2219
2220 /* Given an M x N integer matrix A, this function determines an M x
2221 M unimodular matrix U, and an M x N echelon matrix S such that
2222 "U.A = S". This decomposition is also known as "right Hermite".
2223
2224 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2225 Restructuring Compilers" Utpal Banerjee. */
2226
2227 static void
2228 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2229 lambda_matrix S, lambda_matrix U)
2230 {
2231 int i, j, i0 = 0;
2232
2233 lambda_matrix_copy (A, S, m, n);
2234 lambda_matrix_id (U, m);
2235
2236 for (j = 0; j < n; j++)
2237 {
2238 if (lambda_vector_first_nz (S[j], m, i0) < m)
2239 {
2240 ++i0;
2241 for (i = m - 1; i >= i0; i--)
2242 {
2243 while (S[i][j] != 0)
2244 {
2245 int sigma, factor, a, b;
2246
2247 a = S[i-1][j];
2248 b = S[i][j];
2249 sigma = (a * b < 0) ? -1: 1;
2250 a = abs (a);
2251 b = abs (b);
2252 factor = sigma * (a / b);
2253
2254 lambda_matrix_row_add (S, n, i, i-1, -factor);
2255 lambda_matrix_row_exchange (S, i, i-1);
2256
2257 lambda_matrix_row_add (U, m, i, i-1, -factor);
2258 lambda_matrix_row_exchange (U, i, i-1);
2259 }
2260 }
2261 }
2262 }
2263 }
2264
2265 /* Determines the overlapping elements due to accesses CHREC_A and
2266 CHREC_B, that are affine functions. This function cannot handle
2267 symbolic evolution functions, ie. when initial conditions are
2268 parameters, because it uses lambda matrices of integers. */
2269
2270 static void
2271 analyze_subscript_affine_affine (tree chrec_a,
2272 tree chrec_b,
2273 conflict_function **overlaps_a,
2274 conflict_function **overlaps_b,
2275 tree *last_conflicts)
2276 {
2277 unsigned nb_vars_a, nb_vars_b, dim;
2278 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2279 lambda_matrix A, U, S;
2280 struct obstack scratch_obstack;
2281
2282 if (eq_evolutions_p (chrec_a, chrec_b))
2283 {
2284 /* The accessed index overlaps for each iteration in the
2285 loop. */
2286 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2287 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2288 *last_conflicts = chrec_dont_know;
2289 return;
2290 }
2291 if (dump_file && (dump_flags & TDF_DETAILS))
2292 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2293
2294 /* For determining the initial intersection, we have to solve a
2295 Diophantine equation. This is the most time consuming part.
2296
2297 For answering to the question: "Is there a dependence?" we have
2298 to prove that there exists a solution to the Diophantine
2299 equation, and that the solution is in the iteration domain,
2300 i.e. the solution is positive or zero, and that the solution
2301 happens before the upper bound loop.nb_iterations. Otherwise
2302 there is no dependence. This function outputs a description of
2303 the iterations that hold the intersections. */
2304
2305 nb_vars_a = nb_vars_in_chrec (chrec_a);
2306 nb_vars_b = nb_vars_in_chrec (chrec_b);
2307
2308 gcc_obstack_init (&scratch_obstack);
2309
2310 dim = nb_vars_a + nb_vars_b;
2311 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2312 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2313 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2314
2315 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2316 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2317 gamma = init_b - init_a;
2318
2319 /* Don't do all the hard work of solving the Diophantine equation
2320 when we already know the solution: for example,
2321 | {3, +, 1}_1
2322 | {3, +, 4}_2
2323 | gamma = 3 - 3 = 0.
2324 Then the first overlap occurs during the first iterations:
2325 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2326 */
2327 if (gamma == 0)
2328 {
2329 if (nb_vars_a == 1 && nb_vars_b == 1)
2330 {
2331 HOST_WIDE_INT step_a, step_b;
2332 HOST_WIDE_INT niter, niter_a, niter_b;
2333 affine_fn ova, ovb;
2334
2335 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2336 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2337 niter = MIN (niter_a, niter_b);
2338 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2339 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2340
2341 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2342 &ova, &ovb,
2343 last_conflicts, 1);
2344 *overlaps_a = conflict_fn (1, ova);
2345 *overlaps_b = conflict_fn (1, ovb);
2346 }
2347
2348 else if (nb_vars_a == 2 && nb_vars_b == 1)
2349 compute_overlap_steps_for_affine_1_2
2350 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2351
2352 else if (nb_vars_a == 1 && nb_vars_b == 2)
2353 compute_overlap_steps_for_affine_1_2
2354 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2355
2356 else
2357 {
2358 if (dump_file && (dump_flags & TDF_DETAILS))
2359 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2360 *overlaps_a = conflict_fn_not_known ();
2361 *overlaps_b = conflict_fn_not_known ();
2362 *last_conflicts = chrec_dont_know;
2363 }
2364 goto end_analyze_subs_aa;
2365 }
2366
2367 /* U.A = S */
2368 lambda_matrix_right_hermite (A, dim, 1, S, U);
2369
2370 if (S[0][0] < 0)
2371 {
2372 S[0][0] *= -1;
2373 lambda_matrix_row_negate (U, dim, 0);
2374 }
2375 gcd_alpha_beta = S[0][0];
2376
2377 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2378 but that is a quite strange case. Instead of ICEing, answer
2379 don't know. */
2380 if (gcd_alpha_beta == 0)
2381 {
2382 *overlaps_a = conflict_fn_not_known ();
2383 *overlaps_b = conflict_fn_not_known ();
2384 *last_conflicts = chrec_dont_know;
2385 goto end_analyze_subs_aa;
2386 }
2387
2388 /* The classic "gcd-test". */
2389 if (!int_divides_p (gcd_alpha_beta, gamma))
2390 {
2391 /* The "gcd-test" has determined that there is no integer
2392 solution, i.e. there is no dependence. */
2393 *overlaps_a = conflict_fn_no_dependence ();
2394 *overlaps_b = conflict_fn_no_dependence ();
2395 *last_conflicts = integer_zero_node;
2396 }
2397
2398 /* Both access functions are univariate. This includes SIV and MIV cases. */
2399 else if (nb_vars_a == 1 && nb_vars_b == 1)
2400 {
2401 /* Both functions should have the same evolution sign. */
2402 if (((A[0][0] > 0 && -A[1][0] > 0)
2403 || (A[0][0] < 0 && -A[1][0] < 0)))
2404 {
2405 /* The solutions are given by:
2406 |
2407 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2408 | [u21 u22] [y0]
2409
2410 For a given integer t. Using the following variables,
2411
2412 | i0 = u11 * gamma / gcd_alpha_beta
2413 | j0 = u12 * gamma / gcd_alpha_beta
2414 | i1 = u21
2415 | j1 = u22
2416
2417 the solutions are:
2418
2419 | x0 = i0 + i1 * t,
2420 | y0 = j0 + j1 * t. */
2421 HOST_WIDE_INT i0, j0, i1, j1;
2422
2423 i0 = U[0][0] * gamma / gcd_alpha_beta;
2424 j0 = U[0][1] * gamma / gcd_alpha_beta;
2425 i1 = U[1][0];
2426 j1 = U[1][1];
2427
2428 if ((i1 == 0 && i0 < 0)
2429 || (j1 == 0 && j0 < 0))
2430 {
2431 /* There is no solution.
2432 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2433 falls in here, but for the moment we don't look at the
2434 upper bound of the iteration domain. */
2435 *overlaps_a = conflict_fn_no_dependence ();
2436 *overlaps_b = conflict_fn_no_dependence ();
2437 *last_conflicts = integer_zero_node;
2438 goto end_analyze_subs_aa;
2439 }
2440
2441 if (i1 > 0 && j1 > 0)
2442 {
2443 HOST_WIDE_INT niter_a = max_stmt_executions_int
2444 (get_chrec_loop (chrec_a), true);
2445 HOST_WIDE_INT niter_b = max_stmt_executions_int
2446 (get_chrec_loop (chrec_b), true);
2447 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2448
2449 /* (X0, Y0) is a solution of the Diophantine equation:
2450 "chrec_a (X0) = chrec_b (Y0)". */
2451 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2452 CEIL (-j0, j1));
2453 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2454 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2455
2456 /* (X1, Y1) is the smallest positive solution of the eq
2457 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2458 first conflict occurs. */
2459 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2460 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2461 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2462
2463 if (niter > 0)
2464 {
2465 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2466 FLOOR_DIV (niter - j0, j1));
2467 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2468
2469 /* If the overlap occurs outside of the bounds of the
2470 loop, there is no dependence. */
2471 if (x1 >= niter || y1 >= niter)
2472 {
2473 *overlaps_a = conflict_fn_no_dependence ();
2474 *overlaps_b = conflict_fn_no_dependence ();
2475 *last_conflicts = integer_zero_node;
2476 goto end_analyze_subs_aa;
2477 }
2478 else
2479 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2480 }
2481 else
2482 *last_conflicts = chrec_dont_know;
2483
2484 *overlaps_a
2485 = conflict_fn (1,
2486 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2487 1,
2488 build_int_cst (NULL_TREE, i1)));
2489 *overlaps_b
2490 = conflict_fn (1,
2491 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2492 1,
2493 build_int_cst (NULL_TREE, j1)));
2494 }
2495 else
2496 {
2497 /* FIXME: For the moment, the upper bound of the
2498 iteration domain for i and j is not checked. */
2499 if (dump_file && (dump_flags & TDF_DETAILS))
2500 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2501 *overlaps_a = conflict_fn_not_known ();
2502 *overlaps_b = conflict_fn_not_known ();
2503 *last_conflicts = chrec_dont_know;
2504 }
2505 }
2506 else
2507 {
2508 if (dump_file && (dump_flags & TDF_DETAILS))
2509 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2510 *overlaps_a = conflict_fn_not_known ();
2511 *overlaps_b = conflict_fn_not_known ();
2512 *last_conflicts = chrec_dont_know;
2513 }
2514 }
2515 else
2516 {
2517 if (dump_file && (dump_flags & TDF_DETAILS))
2518 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2519 *overlaps_a = conflict_fn_not_known ();
2520 *overlaps_b = conflict_fn_not_known ();
2521 *last_conflicts = chrec_dont_know;
2522 }
2523
2524 end_analyze_subs_aa:
2525 obstack_free (&scratch_obstack, NULL);
2526 if (dump_file && (dump_flags & TDF_DETAILS))
2527 {
2528 fprintf (dump_file, " (overlaps_a = ");
2529 dump_conflict_function (dump_file, *overlaps_a);
2530 fprintf (dump_file, ")\n (overlaps_b = ");
2531 dump_conflict_function (dump_file, *overlaps_b);
2532 fprintf (dump_file, ")\n");
2533 fprintf (dump_file, ")\n");
2534 }
2535 }
2536
2537 /* Returns true when analyze_subscript_affine_affine can be used for
2538 determining the dependence relation between chrec_a and chrec_b,
2539 that contain symbols. This function modifies chrec_a and chrec_b
2540 such that the analysis result is the same, and such that they don't
2541 contain symbols, and then can safely be passed to the analyzer.
2542
2543 Example: The analysis of the following tuples of evolutions produce
2544 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2545 vs. {0, +, 1}_1
2546
2547 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2548 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2549 */
2550
2551 static bool
2552 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2553 {
2554 tree diff, type, left_a, left_b, right_b;
2555
2556 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2557 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2558 /* FIXME: For the moment not handled. Might be refined later. */
2559 return false;
2560
2561 type = chrec_type (*chrec_a);
2562 left_a = CHREC_LEFT (*chrec_a);
2563 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2564 diff = chrec_fold_minus (type, left_a, left_b);
2565
2566 if (!evolution_function_is_constant_p (diff))
2567 return false;
2568
2569 if (dump_file && (dump_flags & TDF_DETAILS))
2570 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2571
2572 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2573 diff, CHREC_RIGHT (*chrec_a));
2574 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2575 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2576 build_int_cst (type, 0),
2577 right_b);
2578 return true;
2579 }
2580
2581 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2582 *OVERLAPS_B are initialized to the functions that describe the
2583 relation between the elements accessed twice by CHREC_A and
2584 CHREC_B. For k >= 0, the following property is verified:
2585
2586 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2587
2588 static void
2589 analyze_siv_subscript (tree chrec_a,
2590 tree chrec_b,
2591 conflict_function **overlaps_a,
2592 conflict_function **overlaps_b,
2593 tree *last_conflicts,
2594 int loop_nest_num)
2595 {
2596 dependence_stats.num_siv++;
2597
2598 if (dump_file && (dump_flags & TDF_DETAILS))
2599 fprintf (dump_file, "(analyze_siv_subscript \n");
2600
2601 if (evolution_function_is_constant_p (chrec_a)
2602 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2603 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2604 overlaps_a, overlaps_b, last_conflicts);
2605
2606 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2607 && evolution_function_is_constant_p (chrec_b))
2608 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2609 overlaps_b, overlaps_a, last_conflicts);
2610
2611 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2612 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2613 {
2614 if (!chrec_contains_symbols (chrec_a)
2615 && !chrec_contains_symbols (chrec_b))
2616 {
2617 analyze_subscript_affine_affine (chrec_a, chrec_b,
2618 overlaps_a, overlaps_b,
2619 last_conflicts);
2620
2621 if (CF_NOT_KNOWN_P (*overlaps_a)
2622 || CF_NOT_KNOWN_P (*overlaps_b))
2623 dependence_stats.num_siv_unimplemented++;
2624 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2625 || CF_NO_DEPENDENCE_P (*overlaps_b))
2626 dependence_stats.num_siv_independent++;
2627 else
2628 dependence_stats.num_siv_dependent++;
2629 }
2630 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2631 &chrec_b))
2632 {
2633 analyze_subscript_affine_affine (chrec_a, chrec_b,
2634 overlaps_a, overlaps_b,
2635 last_conflicts);
2636
2637 if (CF_NOT_KNOWN_P (*overlaps_a)
2638 || CF_NOT_KNOWN_P (*overlaps_b))
2639 dependence_stats.num_siv_unimplemented++;
2640 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2641 || CF_NO_DEPENDENCE_P (*overlaps_b))
2642 dependence_stats.num_siv_independent++;
2643 else
2644 dependence_stats.num_siv_dependent++;
2645 }
2646 else
2647 goto siv_subscript_dontknow;
2648 }
2649
2650 else
2651 {
2652 siv_subscript_dontknow:;
2653 if (dump_file && (dump_flags & TDF_DETAILS))
2654 fprintf (dump_file, "siv test failed: unimplemented.\n");
2655 *overlaps_a = conflict_fn_not_known ();
2656 *overlaps_b = conflict_fn_not_known ();
2657 *last_conflicts = chrec_dont_know;
2658 dependence_stats.num_siv_unimplemented++;
2659 }
2660
2661 if (dump_file && (dump_flags & TDF_DETAILS))
2662 fprintf (dump_file, ")\n");
2663 }
2664
2665 /* Returns false if we can prove that the greatest common divisor of the steps
2666 of CHREC does not divide CST, false otherwise. */
2667
2668 static bool
2669 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2670 {
2671 HOST_WIDE_INT cd = 0, val;
2672 tree step;
2673
2674 if (!host_integerp (cst, 0))
2675 return true;
2676 val = tree_low_cst (cst, 0);
2677
2678 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2679 {
2680 step = CHREC_RIGHT (chrec);
2681 if (!host_integerp (step, 0))
2682 return true;
2683 cd = gcd (cd, tree_low_cst (step, 0));
2684 chrec = CHREC_LEFT (chrec);
2685 }
2686
2687 return val % cd == 0;
2688 }
2689
2690 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2691 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2692 functions that describe the relation between the elements accessed
2693 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2694 is verified:
2695
2696 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2697
2698 static void
2699 analyze_miv_subscript (tree chrec_a,
2700 tree chrec_b,
2701 conflict_function **overlaps_a,
2702 conflict_function **overlaps_b,
2703 tree *last_conflicts,
2704 struct loop *loop_nest)
2705 {
2706 tree type, difference;
2707
2708 dependence_stats.num_miv++;
2709 if (dump_file && (dump_flags & TDF_DETAILS))
2710 fprintf (dump_file, "(analyze_miv_subscript \n");
2711
2712 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2713 chrec_a = chrec_convert (type, chrec_a, NULL);
2714 chrec_b = chrec_convert (type, chrec_b, NULL);
2715 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2716
2717 if (eq_evolutions_p (chrec_a, chrec_b))
2718 {
2719 /* Access functions are the same: all the elements are accessed
2720 in the same order. */
2721 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2722 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2723 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2724 dependence_stats.num_miv_dependent++;
2725 }
2726
2727 else if (evolution_function_is_constant_p (difference)
2728 /* For the moment, the following is verified:
2729 evolution_function_is_affine_multivariate_p (chrec_a,
2730 loop_nest->num) */
2731 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2732 {
2733 /* testsuite/.../ssa-chrec-33.c
2734 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2735
2736 The difference is 1, and all the evolution steps are multiples
2737 of 2, consequently there are no overlapping elements. */
2738 *overlaps_a = conflict_fn_no_dependence ();
2739 *overlaps_b = conflict_fn_no_dependence ();
2740 *last_conflicts = integer_zero_node;
2741 dependence_stats.num_miv_independent++;
2742 }
2743
2744 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2745 && !chrec_contains_symbols (chrec_a)
2746 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2747 && !chrec_contains_symbols (chrec_b))
2748 {
2749 /* testsuite/.../ssa-chrec-35.c
2750 {0, +, 1}_2 vs. {0, +, 1}_3
2751 the overlapping elements are respectively located at iterations:
2752 {0, +, 1}_x and {0, +, 1}_x,
2753 in other words, we have the equality:
2754 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2755
2756 Other examples:
2757 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2758 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2759
2760 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2761 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2762 */
2763 analyze_subscript_affine_affine (chrec_a, chrec_b,
2764 overlaps_a, overlaps_b, last_conflicts);
2765
2766 if (CF_NOT_KNOWN_P (*overlaps_a)
2767 || CF_NOT_KNOWN_P (*overlaps_b))
2768 dependence_stats.num_miv_unimplemented++;
2769 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2770 || CF_NO_DEPENDENCE_P (*overlaps_b))
2771 dependence_stats.num_miv_independent++;
2772 else
2773 dependence_stats.num_miv_dependent++;
2774 }
2775
2776 else
2777 {
2778 /* When the analysis is too difficult, answer "don't know". */
2779 if (dump_file && (dump_flags & TDF_DETAILS))
2780 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2781
2782 *overlaps_a = conflict_fn_not_known ();
2783 *overlaps_b = conflict_fn_not_known ();
2784 *last_conflicts = chrec_dont_know;
2785 dependence_stats.num_miv_unimplemented++;
2786 }
2787
2788 if (dump_file && (dump_flags & TDF_DETAILS))
2789 fprintf (dump_file, ")\n");
2790 }
2791
2792 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2793 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2794 OVERLAP_ITERATIONS_B are initialized with two functions that
2795 describe the iterations that contain conflicting elements.
2796
2797 Remark: For an integer k >= 0, the following equality is true:
2798
2799 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2800 */
2801
2802 static void
2803 analyze_overlapping_iterations (tree chrec_a,
2804 tree chrec_b,
2805 conflict_function **overlap_iterations_a,
2806 conflict_function **overlap_iterations_b,
2807 tree *last_conflicts, struct loop *loop_nest)
2808 {
2809 unsigned int lnn = loop_nest->num;
2810
2811 dependence_stats.num_subscript_tests++;
2812
2813 if (dump_file && (dump_flags & TDF_DETAILS))
2814 {
2815 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2816 fprintf (dump_file, " (chrec_a = ");
2817 print_generic_expr (dump_file, chrec_a, 0);
2818 fprintf (dump_file, ")\n (chrec_b = ");
2819 print_generic_expr (dump_file, chrec_b, 0);
2820 fprintf (dump_file, ")\n");
2821 }
2822
2823 if (chrec_a == NULL_TREE
2824 || chrec_b == NULL_TREE
2825 || chrec_contains_undetermined (chrec_a)
2826 || chrec_contains_undetermined (chrec_b))
2827 {
2828 dependence_stats.num_subscript_undetermined++;
2829
2830 *overlap_iterations_a = conflict_fn_not_known ();
2831 *overlap_iterations_b = conflict_fn_not_known ();
2832 }
2833
2834 /* If they are the same chrec, and are affine, they overlap
2835 on every iteration. */
2836 else if (eq_evolutions_p (chrec_a, chrec_b)
2837 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2838 || operand_equal_p (chrec_a, chrec_b, 0)))
2839 {
2840 dependence_stats.num_same_subscript_function++;
2841 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2842 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2843 *last_conflicts = chrec_dont_know;
2844 }
2845
2846 /* If they aren't the same, and aren't affine, we can't do anything
2847 yet. */
2848 else if ((chrec_contains_symbols (chrec_a)
2849 || chrec_contains_symbols (chrec_b))
2850 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2851 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2852 {
2853 dependence_stats.num_subscript_undetermined++;
2854 *overlap_iterations_a = conflict_fn_not_known ();
2855 *overlap_iterations_b = conflict_fn_not_known ();
2856 }
2857
2858 else if (ziv_subscript_p (chrec_a, chrec_b))
2859 analyze_ziv_subscript (chrec_a, chrec_b,
2860 overlap_iterations_a, overlap_iterations_b,
2861 last_conflicts);
2862
2863 else if (siv_subscript_p (chrec_a, chrec_b))
2864 analyze_siv_subscript (chrec_a, chrec_b,
2865 overlap_iterations_a, overlap_iterations_b,
2866 last_conflicts, lnn);
2867
2868 else
2869 analyze_miv_subscript (chrec_a, chrec_b,
2870 overlap_iterations_a, overlap_iterations_b,
2871 last_conflicts, loop_nest);
2872
2873 if (dump_file && (dump_flags & TDF_DETAILS))
2874 {
2875 fprintf (dump_file, " (overlap_iterations_a = ");
2876 dump_conflict_function (dump_file, *overlap_iterations_a);
2877 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2878 dump_conflict_function (dump_file, *overlap_iterations_b);
2879 fprintf (dump_file, ")\n");
2880 fprintf (dump_file, ")\n");
2881 }
2882 }
2883
2884 /* Helper function for uniquely inserting distance vectors. */
2885
2886 static void
2887 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2888 {
2889 unsigned i;
2890 lambda_vector v;
2891
2892 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2893 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2894 return;
2895
2896 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2897 }
2898
2899 /* Helper function for uniquely inserting direction vectors. */
2900
2901 static void
2902 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2903 {
2904 unsigned i;
2905 lambda_vector v;
2906
2907 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2908 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2909 return;
2910
2911 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2912 }
2913
2914 /* Add a distance of 1 on all the loops outer than INDEX. If we
2915 haven't yet determined a distance for this outer loop, push a new
2916 distance vector composed of the previous distance, and a distance
2917 of 1 for this outer loop. Example:
2918
2919 | loop_1
2920 | loop_2
2921 | A[10]
2922 | endloop_2
2923 | endloop_1
2924
2925 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2926 save (0, 1), then we have to save (1, 0). */
2927
2928 static void
2929 add_outer_distances (struct data_dependence_relation *ddr,
2930 lambda_vector dist_v, int index)
2931 {
2932 /* For each outer loop where init_v is not set, the accesses are
2933 in dependence of distance 1 in the loop. */
2934 while (--index >= 0)
2935 {
2936 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2937 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2938 save_v[index] = 1;
2939 save_dist_v (ddr, save_v);
2940 }
2941 }
2942
2943 /* Return false when fail to represent the data dependence as a
2944 distance vector. INIT_B is set to true when a component has been
2945 added to the distance vector DIST_V. INDEX_CARRY is then set to
2946 the index in DIST_V that carries the dependence. */
2947
2948 static bool
2949 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2950 struct data_reference *ddr_a,
2951 struct data_reference *ddr_b,
2952 lambda_vector dist_v, bool *init_b,
2953 int *index_carry)
2954 {
2955 unsigned i;
2956 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2957
2958 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2959 {
2960 tree access_fn_a, access_fn_b;
2961 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2962
2963 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2964 {
2965 non_affine_dependence_relation (ddr);
2966 return false;
2967 }
2968
2969 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2970 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2971
2972 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2973 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2974 {
2975 int dist, index;
2976 int var_a = CHREC_VARIABLE (access_fn_a);
2977 int var_b = CHREC_VARIABLE (access_fn_b);
2978
2979 if (var_a != var_b
2980 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2981 {
2982 non_affine_dependence_relation (ddr);
2983 return false;
2984 }
2985
2986 dist = int_cst_value (SUB_DISTANCE (subscript));
2987 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2988 *index_carry = MIN (index, *index_carry);
2989
2990 /* This is the subscript coupling test. If we have already
2991 recorded a distance for this loop (a distance coming from
2992 another subscript), it should be the same. For example,
2993 in the following code, there is no dependence:
2994
2995 | loop i = 0, N, 1
2996 | T[i+1][i] = ...
2997 | ... = T[i][i]
2998 | endloop
2999 */
3000 if (init_v[index] != 0 && dist_v[index] != dist)
3001 {
3002 finalize_ddr_dependent (ddr, chrec_known);
3003 return false;
3004 }
3005
3006 dist_v[index] = dist;
3007 init_v[index] = 1;
3008 *init_b = true;
3009 }
3010 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3011 {
3012 /* This can be for example an affine vs. constant dependence
3013 (T[i] vs. T[3]) that is not an affine dependence and is
3014 not representable as a distance vector. */
3015 non_affine_dependence_relation (ddr);
3016 return false;
3017 }
3018 }
3019
3020 return true;
3021 }
3022
3023 /* Return true when the DDR contains only constant access functions. */
3024
3025 static bool
3026 constant_access_functions (const struct data_dependence_relation *ddr)
3027 {
3028 unsigned i;
3029
3030 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3031 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3032 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3033 return false;
3034
3035 return true;
3036 }
3037
3038 /* Helper function for the case where DDR_A and DDR_B are the same
3039 multivariate access function with a constant step. For an example
3040 see pr34635-1.c. */
3041
3042 static void
3043 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3044 {
3045 int x_1, x_2;
3046 tree c_1 = CHREC_LEFT (c_2);
3047 tree c_0 = CHREC_LEFT (c_1);
3048 lambda_vector dist_v;
3049 int v1, v2, cd;
3050
3051 /* Polynomials with more than 2 variables are not handled yet. When
3052 the evolution steps are parameters, it is not possible to
3053 represent the dependence using classical distance vectors. */
3054 if (TREE_CODE (c_0) != INTEGER_CST
3055 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3056 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3057 {
3058 DDR_AFFINE_P (ddr) = false;
3059 return;
3060 }
3061
3062 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3063 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3064
3065 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3066 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3067 v1 = int_cst_value (CHREC_RIGHT (c_1));
3068 v2 = int_cst_value (CHREC_RIGHT (c_2));
3069 cd = gcd (v1, v2);
3070 v1 /= cd;
3071 v2 /= cd;
3072
3073 if (v2 < 0)
3074 {
3075 v2 = -v2;
3076 v1 = -v1;
3077 }
3078
3079 dist_v[x_1] = v2;
3080 dist_v[x_2] = -v1;
3081 save_dist_v (ddr, dist_v);
3082
3083 add_outer_distances (ddr, dist_v, x_1);
3084 }
3085
3086 /* Helper function for the case where DDR_A and DDR_B are the same
3087 access functions. */
3088
3089 static void
3090 add_other_self_distances (struct data_dependence_relation *ddr)
3091 {
3092 lambda_vector dist_v;
3093 unsigned i;
3094 int index_carry = DDR_NB_LOOPS (ddr);
3095
3096 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3097 {
3098 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3099
3100 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3101 {
3102 if (!evolution_function_is_univariate_p (access_fun))
3103 {
3104 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3105 {
3106 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3107 return;
3108 }
3109
3110 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3111
3112 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3113 add_multivariate_self_dist (ddr, access_fun);
3114 else
3115 /* The evolution step is not constant: it varies in
3116 the outer loop, so this cannot be represented by a
3117 distance vector. For example in pr34635.c the
3118 evolution is {0, +, {0, +, 4}_1}_2. */
3119 DDR_AFFINE_P (ddr) = false;
3120
3121 return;
3122 }
3123
3124 index_carry = MIN (index_carry,
3125 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3126 DDR_LOOP_NEST (ddr)));
3127 }
3128 }
3129
3130 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3131 add_outer_distances (ddr, dist_v, index_carry);
3132 }
3133
3134 static void
3135 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3136 {
3137 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3138
3139 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3140 save_dist_v (ddr, dist_v);
3141 }
3142
3143 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3144 is the case for example when access functions are the same and
3145 equal to a constant, as in:
3146
3147 | loop_1
3148 | A[3] = ...
3149 | ... = A[3]
3150 | endloop_1
3151
3152 in which case the distance vectors are (0) and (1). */
3153
3154 static void
3155 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3156 {
3157 unsigned i, j;
3158
3159 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3160 {
3161 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3162 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3163 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3164
3165 for (j = 0; j < ca->n; j++)
3166 if (affine_function_zero_p (ca->fns[j]))
3167 {
3168 insert_innermost_unit_dist_vector (ddr);
3169 return;
3170 }
3171
3172 for (j = 0; j < cb->n; j++)
3173 if (affine_function_zero_p (cb->fns[j]))
3174 {
3175 insert_innermost_unit_dist_vector (ddr);
3176 return;
3177 }
3178 }
3179 }
3180
3181 /* Compute the classic per loop distance vector. DDR is the data
3182 dependence relation to build a vector from. Return false when fail
3183 to represent the data dependence as a distance vector. */
3184
3185 static bool
3186 build_classic_dist_vector (struct data_dependence_relation *ddr,
3187 struct loop *loop_nest)
3188 {
3189 bool init_b = false;
3190 int index_carry = DDR_NB_LOOPS (ddr);
3191 lambda_vector dist_v;
3192
3193 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3194 return false;
3195
3196 if (same_access_functions (ddr))
3197 {
3198 /* Save the 0 vector. */
3199 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3200 save_dist_v (ddr, dist_v);
3201
3202 if (constant_access_functions (ddr))
3203 add_distance_for_zero_overlaps (ddr);
3204
3205 if (DDR_NB_LOOPS (ddr) > 1)
3206 add_other_self_distances (ddr);
3207
3208 return true;
3209 }
3210
3211 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3212 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3213 dist_v, &init_b, &index_carry))
3214 return false;
3215
3216 /* Save the distance vector if we initialized one. */
3217 if (init_b)
3218 {
3219 /* Verify a basic constraint: classic distance vectors should
3220 always be lexicographically positive.
3221
3222 Data references are collected in the order of execution of
3223 the program, thus for the following loop
3224
3225 | for (i = 1; i < 100; i++)
3226 | for (j = 1; j < 100; j++)
3227 | {
3228 | t = T[j+1][i-1]; // A
3229 | T[j][i] = t + 2; // B
3230 | }
3231
3232 references are collected following the direction of the wind:
3233 A then B. The data dependence tests are performed also
3234 following this order, such that we're looking at the distance
3235 separating the elements accessed by A from the elements later
3236 accessed by B. But in this example, the distance returned by
3237 test_dep (A, B) is lexicographically negative (-1, 1), that
3238 means that the access A occurs later than B with respect to
3239 the outer loop, ie. we're actually looking upwind. In this
3240 case we solve test_dep (B, A) looking downwind to the
3241 lexicographically positive solution, that returns the
3242 distance vector (1, -1). */
3243 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3244 {
3245 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3246 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3247 loop_nest))
3248 return false;
3249 compute_subscript_distance (ddr);
3250 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3251 save_v, &init_b, &index_carry))
3252 return false;
3253 save_dist_v (ddr, save_v);
3254 DDR_REVERSED_P (ddr) = true;
3255
3256 /* In this case there is a dependence forward for all the
3257 outer loops:
3258
3259 | for (k = 1; k < 100; k++)
3260 | for (i = 1; i < 100; i++)
3261 | for (j = 1; j < 100; j++)
3262 | {
3263 | t = T[j+1][i-1]; // A
3264 | T[j][i] = t + 2; // B
3265 | }
3266
3267 the vectors are:
3268 (0, 1, -1)
3269 (1, 1, -1)
3270 (1, -1, 1)
3271 */
3272 if (DDR_NB_LOOPS (ddr) > 1)
3273 {
3274 add_outer_distances (ddr, save_v, index_carry);
3275 add_outer_distances (ddr, dist_v, index_carry);
3276 }
3277 }
3278 else
3279 {
3280 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3281 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3282
3283 if (DDR_NB_LOOPS (ddr) > 1)
3284 {
3285 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3286
3287 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3288 DDR_A (ddr), loop_nest))
3289 return false;
3290 compute_subscript_distance (ddr);
3291 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3292 opposite_v, &init_b,
3293 &index_carry))
3294 return false;
3295
3296 save_dist_v (ddr, save_v);
3297 add_outer_distances (ddr, dist_v, index_carry);
3298 add_outer_distances (ddr, opposite_v, index_carry);
3299 }
3300 else
3301 save_dist_v (ddr, save_v);
3302 }
3303 }
3304 else
3305 {
3306 /* There is a distance of 1 on all the outer loops: Example:
3307 there is a dependence of distance 1 on loop_1 for the array A.
3308
3309 | loop_1
3310 | A[5] = ...
3311 | endloop
3312 */
3313 add_outer_distances (ddr, dist_v,
3314 lambda_vector_first_nz (dist_v,
3315 DDR_NB_LOOPS (ddr), 0));
3316 }
3317
3318 if (dump_file && (dump_flags & TDF_DETAILS))
3319 {
3320 unsigned i;
3321
3322 fprintf (dump_file, "(build_classic_dist_vector\n");
3323 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3324 {
3325 fprintf (dump_file, " dist_vector = (");
3326 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3327 DDR_NB_LOOPS (ddr));
3328 fprintf (dump_file, " )\n");
3329 }
3330 fprintf (dump_file, ")\n");
3331 }
3332
3333 return true;
3334 }
3335
3336 /* Return the direction for a given distance.
3337 FIXME: Computing dir this way is suboptimal, since dir can catch
3338 cases that dist is unable to represent. */
3339
3340 static inline enum data_dependence_direction
3341 dir_from_dist (int dist)
3342 {
3343 if (dist > 0)
3344 return dir_positive;
3345 else if (dist < 0)
3346 return dir_negative;
3347 else
3348 return dir_equal;
3349 }
3350
3351 /* Compute the classic per loop direction vector. DDR is the data
3352 dependence relation to build a vector from. */
3353
3354 static void
3355 build_classic_dir_vector (struct data_dependence_relation *ddr)
3356 {
3357 unsigned i, j;
3358 lambda_vector dist_v;
3359
3360 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3361 {
3362 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3363
3364 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3365 dir_v[j] = dir_from_dist (dist_v[j]);
3366
3367 save_dir_v (ddr, dir_v);
3368 }
3369 }
3370
3371 /* Helper function. Returns true when there is a dependence between
3372 data references DRA and DRB. */
3373
3374 static bool
3375 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3376 struct data_reference *dra,
3377 struct data_reference *drb,
3378 struct loop *loop_nest)
3379 {
3380 unsigned int i;
3381 tree last_conflicts;
3382 struct subscript *subscript;
3383
3384 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3385 i++)
3386 {
3387 conflict_function *overlaps_a, *overlaps_b;
3388
3389 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3390 DR_ACCESS_FN (drb, i),
3391 &overlaps_a, &overlaps_b,
3392 &last_conflicts, loop_nest);
3393
3394 if (CF_NOT_KNOWN_P (overlaps_a)
3395 || CF_NOT_KNOWN_P (overlaps_b))
3396 {
3397 finalize_ddr_dependent (ddr, chrec_dont_know);
3398 dependence_stats.num_dependence_undetermined++;
3399 free_conflict_function (overlaps_a);
3400 free_conflict_function (overlaps_b);
3401 return false;
3402 }
3403
3404 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3405 || CF_NO_DEPENDENCE_P (overlaps_b))
3406 {
3407 finalize_ddr_dependent (ddr, chrec_known);
3408 dependence_stats.num_dependence_independent++;
3409 free_conflict_function (overlaps_a);
3410 free_conflict_function (overlaps_b);
3411 return false;
3412 }
3413
3414 else
3415 {
3416 if (SUB_CONFLICTS_IN_A (subscript))
3417 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3418 if (SUB_CONFLICTS_IN_B (subscript))
3419 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3420
3421 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3422 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3423 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3424 }
3425 }
3426
3427 return true;
3428 }
3429
3430 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3431
3432 static void
3433 subscript_dependence_tester (struct data_dependence_relation *ddr,
3434 struct loop *loop_nest)
3435 {
3436
3437 if (dump_file && (dump_flags & TDF_DETAILS))
3438 fprintf (dump_file, "(subscript_dependence_tester \n");
3439
3440 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3441 dependence_stats.num_dependence_dependent++;
3442
3443 compute_subscript_distance (ddr);
3444 if (build_classic_dist_vector (ddr, loop_nest))
3445 build_classic_dir_vector (ddr);
3446
3447 if (dump_file && (dump_flags & TDF_DETAILS))
3448 fprintf (dump_file, ")\n");
3449 }
3450
3451 /* Returns true when all the access functions of A are affine or
3452 constant with respect to LOOP_NEST. */
3453
3454 static bool
3455 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3456 const struct loop *loop_nest)
3457 {
3458 unsigned int i;
3459 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3460 tree t;
3461
3462 FOR_EACH_VEC_ELT (tree, fns, i, t)
3463 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3464 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3465 return false;
3466
3467 return true;
3468 }
3469
3470 /* Initializes an equation for an OMEGA problem using the information
3471 contained in the ACCESS_FUN. Returns true when the operation
3472 succeeded.
3473
3474 PB is the omega constraint system.
3475 EQ is the number of the equation to be initialized.
3476 OFFSET is used for shifting the variables names in the constraints:
3477 a constrain is composed of 2 * the number of variables surrounding
3478 dependence accesses. OFFSET is set either to 0 for the first n variables,
3479 then it is set to n.
3480 ACCESS_FUN is expected to be an affine chrec. */
3481
3482 static bool
3483 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3484 unsigned int offset, tree access_fun,
3485 struct data_dependence_relation *ddr)
3486 {
3487 switch (TREE_CODE (access_fun))
3488 {
3489 case POLYNOMIAL_CHREC:
3490 {
3491 tree left = CHREC_LEFT (access_fun);
3492 tree right = CHREC_RIGHT (access_fun);
3493 int var = CHREC_VARIABLE (access_fun);
3494 unsigned var_idx;
3495
3496 if (TREE_CODE (right) != INTEGER_CST)
3497 return false;
3498
3499 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3500 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3501
3502 /* Compute the innermost loop index. */
3503 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3504
3505 if (offset == 0)
3506 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3507 += int_cst_value (right);
3508
3509 switch (TREE_CODE (left))
3510 {
3511 case POLYNOMIAL_CHREC:
3512 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3513
3514 case INTEGER_CST:
3515 pb->eqs[eq].coef[0] += int_cst_value (left);
3516 return true;
3517
3518 default:
3519 return false;
3520 }
3521 }
3522
3523 case INTEGER_CST:
3524 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3525 return true;
3526
3527 default:
3528 return false;
3529 }
3530 }
3531
3532 /* As explained in the comments preceding init_omega_for_ddr, we have
3533 to set up a system for each loop level, setting outer loops
3534 variation to zero, and current loop variation to positive or zero.
3535 Save each lexico positive distance vector. */
3536
3537 static void
3538 omega_extract_distance_vectors (omega_pb pb,
3539 struct data_dependence_relation *ddr)
3540 {
3541 int eq, geq;
3542 unsigned i, j;
3543 struct loop *loopi, *loopj;
3544 enum omega_result res;
3545
3546 /* Set a new problem for each loop in the nest. The basis is the
3547 problem that we have initialized until now. On top of this we
3548 add new constraints. */
3549 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3550 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3551 {
3552 int dist = 0;
3553 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3554 DDR_NB_LOOPS (ddr));
3555
3556 omega_copy_problem (copy, pb);
3557
3558 /* For all the outer loops "loop_j", add "dj = 0". */
3559 for (j = 0;
3560 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3561 {
3562 eq = omega_add_zero_eq (copy, omega_black);
3563 copy->eqs[eq].coef[j + 1] = 1;
3564 }
3565
3566 /* For "loop_i", add "0 <= di". */
3567 geq = omega_add_zero_geq (copy, omega_black);
3568 copy->geqs[geq].coef[i + 1] = 1;
3569
3570 /* Reduce the constraint system, and test that the current
3571 problem is feasible. */
3572 res = omega_simplify_problem (copy);
3573 if (res == omega_false
3574 || res == omega_unknown
3575 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3576 goto next_problem;
3577
3578 for (eq = 0; eq < copy->num_subs; eq++)
3579 if (copy->subs[eq].key == (int) i + 1)
3580 {
3581 dist = copy->subs[eq].coef[0];
3582 goto found_dist;
3583 }
3584
3585 if (dist == 0)
3586 {
3587 /* Reinitialize problem... */
3588 omega_copy_problem (copy, pb);
3589 for (j = 0;
3590 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3591 {
3592 eq = omega_add_zero_eq (copy, omega_black);
3593 copy->eqs[eq].coef[j + 1] = 1;
3594 }
3595
3596 /* ..., but this time "di = 1". */
3597 eq = omega_add_zero_eq (copy, omega_black);
3598 copy->eqs[eq].coef[i + 1] = 1;
3599 copy->eqs[eq].coef[0] = -1;
3600
3601 res = omega_simplify_problem (copy);
3602 if (res == omega_false
3603 || res == omega_unknown
3604 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3605 goto next_problem;
3606
3607 for (eq = 0; eq < copy->num_subs; eq++)
3608 if (copy->subs[eq].key == (int) i + 1)
3609 {
3610 dist = copy->subs[eq].coef[0];
3611 goto found_dist;
3612 }
3613 }
3614
3615 found_dist:;
3616 /* Save the lexicographically positive distance vector. */
3617 if (dist >= 0)
3618 {
3619 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3620 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3621
3622 dist_v[i] = dist;
3623
3624 for (eq = 0; eq < copy->num_subs; eq++)
3625 if (copy->subs[eq].key > 0)
3626 {
3627 dist = copy->subs[eq].coef[0];
3628 dist_v[copy->subs[eq].key - 1] = dist;
3629 }
3630
3631 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3632 dir_v[j] = dir_from_dist (dist_v[j]);
3633
3634 save_dist_v (ddr, dist_v);
3635 save_dir_v (ddr, dir_v);
3636 }
3637
3638 next_problem:;
3639 omega_free_problem (copy);
3640 }
3641 }
3642
3643 /* This is called for each subscript of a tuple of data references:
3644 insert an equality for representing the conflicts. */
3645
3646 static bool
3647 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3648 struct data_dependence_relation *ddr,
3649 omega_pb pb, bool *maybe_dependent)
3650 {
3651 int eq;
3652 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3653 TREE_TYPE (access_fun_b));
3654 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3655 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3656 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3657 tree minus_one;
3658
3659 /* When the fun_a - fun_b is not constant, the dependence is not
3660 captured by the classic distance vector representation. */
3661 if (TREE_CODE (difference) != INTEGER_CST)
3662 return false;
3663
3664 /* ZIV test. */
3665 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3666 {
3667 /* There is no dependence. */
3668 *maybe_dependent = false;
3669 return true;
3670 }
3671
3672 minus_one = build_int_cst (type, -1);
3673 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3674
3675 eq = omega_add_zero_eq (pb, omega_black);
3676 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3677 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3678 /* There is probably a dependence, but the system of
3679 constraints cannot be built: answer "don't know". */
3680 return false;
3681
3682 /* GCD test. */
3683 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3684 && !int_divides_p (lambda_vector_gcd
3685 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3686 2 * DDR_NB_LOOPS (ddr)),
3687 pb->eqs[eq].coef[0]))
3688 {
3689 /* There is no dependence. */
3690 *maybe_dependent = false;
3691 return true;
3692 }
3693
3694 return true;
3695 }
3696
3697 /* Helper function, same as init_omega_for_ddr but specialized for
3698 data references A and B. */
3699
3700 static bool
3701 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3702 struct data_dependence_relation *ddr,
3703 omega_pb pb, bool *maybe_dependent)
3704 {
3705 unsigned i;
3706 int ineq;
3707 struct loop *loopi;
3708 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3709
3710 /* Insert an equality per subscript. */
3711 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3712 {
3713 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3714 ddr, pb, maybe_dependent))
3715 return false;
3716 else if (*maybe_dependent == false)
3717 {
3718 /* There is no dependence. */
3719 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3720 return true;
3721 }
3722 }
3723
3724 /* Insert inequalities: constraints corresponding to the iteration
3725 domain, i.e. the loops surrounding the references "loop_x" and
3726 the distance variables "dx". The layout of the OMEGA
3727 representation is as follows:
3728 - coef[0] is the constant
3729 - coef[1..nb_loops] are the protected variables that will not be
3730 removed by the solver: the "dx"
3731 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3732 */
3733 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3734 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3735 {
3736 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3737
3738 /* 0 <= loop_x */
3739 ineq = omega_add_zero_geq (pb, omega_black);
3740 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3741
3742 /* 0 <= loop_x + dx */
3743 ineq = omega_add_zero_geq (pb, omega_black);
3744 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3745 pb->geqs[ineq].coef[i + 1] = 1;
3746
3747 if (nbi != -1)
3748 {
3749 /* loop_x <= nb_iters */
3750 ineq = omega_add_zero_geq (pb, omega_black);
3751 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3752 pb->geqs[ineq].coef[0] = nbi;
3753
3754 /* loop_x + dx <= nb_iters */
3755 ineq = omega_add_zero_geq (pb, omega_black);
3756 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3757 pb->geqs[ineq].coef[i + 1] = -1;
3758 pb->geqs[ineq].coef[0] = nbi;
3759
3760 /* A step "dx" bigger than nb_iters is not feasible, so
3761 add "0 <= nb_iters + dx", */
3762 ineq = omega_add_zero_geq (pb, omega_black);
3763 pb->geqs[ineq].coef[i + 1] = 1;
3764 pb->geqs[ineq].coef[0] = nbi;
3765 /* and "dx <= nb_iters". */
3766 ineq = omega_add_zero_geq (pb, omega_black);
3767 pb->geqs[ineq].coef[i + 1] = -1;
3768 pb->geqs[ineq].coef[0] = nbi;
3769 }
3770 }
3771
3772 omega_extract_distance_vectors (pb, ddr);
3773
3774 return true;
3775 }
3776
3777 /* Sets up the Omega dependence problem for the data dependence
3778 relation DDR. Returns false when the constraint system cannot be
3779 built, ie. when the test answers "don't know". Returns true
3780 otherwise, and when independence has been proved (using one of the
3781 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3782 set MAYBE_DEPENDENT to true.
3783
3784 Example: for setting up the dependence system corresponding to the
3785 conflicting accesses
3786
3787 | loop_i
3788 | loop_j
3789 | A[i, i+1] = ...
3790 | ... A[2*j, 2*(i + j)]
3791 | endloop_j
3792 | endloop_i
3793
3794 the following constraints come from the iteration domain:
3795
3796 0 <= i <= Ni
3797 0 <= i + di <= Ni
3798 0 <= j <= Nj
3799 0 <= j + dj <= Nj
3800
3801 where di, dj are the distance variables. The constraints
3802 representing the conflicting elements are:
3803
3804 i = 2 * (j + dj)
3805 i + 1 = 2 * (i + di + j + dj)
3806
3807 For asking that the resulting distance vector (di, dj) be
3808 lexicographically positive, we insert the constraint "di >= 0". If
3809 "di = 0" in the solution, we fix that component to zero, and we
3810 look at the inner loops: we set a new problem where all the outer
3811 loop distances are zero, and fix this inner component to be
3812 positive. When one of the components is positive, we save that
3813 distance, and set a new problem where the distance on this loop is
3814 zero, searching for other distances in the inner loops. Here is
3815 the classic example that illustrates that we have to set for each
3816 inner loop a new problem:
3817
3818 | loop_1
3819 | loop_2
3820 | A[10]
3821 | endloop_2
3822 | endloop_1
3823
3824 we have to save two distances (1, 0) and (0, 1).
3825
3826 Given two array references, refA and refB, we have to set the
3827 dependence problem twice, refA vs. refB and refB vs. refA, and we
3828 cannot do a single test, as refB might occur before refA in the
3829 inner loops, and the contrary when considering outer loops: ex.
3830
3831 | loop_0
3832 | loop_1
3833 | loop_2
3834 | T[{1,+,1}_2][{1,+,1}_1] // refA
3835 | T[{2,+,1}_2][{0,+,1}_1] // refB
3836 | endloop_2
3837 | endloop_1
3838 | endloop_0
3839
3840 refB touches the elements in T before refA, and thus for the same
3841 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3842 but for successive loop_0 iterations, we have (1, -1, 1)
3843
3844 The Omega solver expects the distance variables ("di" in the
3845 previous example) to come first in the constraint system (as
3846 variables to be protected, or "safe" variables), the constraint
3847 system is built using the following layout:
3848
3849 "cst | distance vars | index vars".
3850 */
3851
3852 static bool
3853 init_omega_for_ddr (struct data_dependence_relation *ddr,
3854 bool *maybe_dependent)
3855 {
3856 omega_pb pb;
3857 bool res = false;
3858
3859 *maybe_dependent = true;
3860
3861 if (same_access_functions (ddr))
3862 {
3863 unsigned j;
3864 lambda_vector dir_v;
3865
3866 /* Save the 0 vector. */
3867 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3868 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3869 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3870 dir_v[j] = dir_equal;
3871 save_dir_v (ddr, dir_v);
3872
3873 /* Save the dependences carried by outer loops. */
3874 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3875 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3876 maybe_dependent);
3877 omega_free_problem (pb);
3878 return res;
3879 }
3880
3881 /* Omega expects the protected variables (those that have to be kept
3882 after elimination) to appear first in the constraint system.
3883 These variables are the distance variables. In the following
3884 initialization we declare NB_LOOPS safe variables, and the total
3885 number of variables for the constraint system is 2*NB_LOOPS. */
3886 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3887 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3888 maybe_dependent);
3889 omega_free_problem (pb);
3890
3891 /* Stop computation if not decidable, or no dependence. */
3892 if (res == false || *maybe_dependent == false)
3893 return res;
3894
3895 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3896 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3897 maybe_dependent);
3898 omega_free_problem (pb);
3899
3900 return res;
3901 }
3902
3903 /* Return true when DDR contains the same information as that stored
3904 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3905
3906 static bool
3907 ddr_consistent_p (FILE *file,
3908 struct data_dependence_relation *ddr,
3909 VEC (lambda_vector, heap) *dist_vects,
3910 VEC (lambda_vector, heap) *dir_vects)
3911 {
3912 unsigned int i, j;
3913
3914 /* If dump_file is set, output there. */
3915 if (dump_file && (dump_flags & TDF_DETAILS))
3916 file = dump_file;
3917
3918 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3919 {
3920 lambda_vector b_dist_v;
3921 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3922 VEC_length (lambda_vector, dist_vects),
3923 DDR_NUM_DIST_VECTS (ddr));
3924
3925 fprintf (file, "Banerjee dist vectors:\n");
3926 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3927 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3928
3929 fprintf (file, "Omega dist vectors:\n");
3930 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3931 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3932
3933 fprintf (file, "data dependence relation:\n");
3934 dump_data_dependence_relation (file, ddr);
3935
3936 fprintf (file, ")\n");
3937 return false;
3938 }
3939
3940 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3941 {
3942 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3943 VEC_length (lambda_vector, dir_vects),
3944 DDR_NUM_DIR_VECTS (ddr));
3945 return false;
3946 }
3947
3948 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3949 {
3950 lambda_vector a_dist_v;
3951 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3952
3953 /* Distance vectors are not ordered in the same way in the DDR
3954 and in the DIST_VECTS: search for a matching vector. */
3955 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3956 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3957 break;
3958
3959 if (j == VEC_length (lambda_vector, dist_vects))
3960 {
3961 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3962 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3963 fprintf (file, "not found in Omega dist vectors:\n");
3964 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3965 fprintf (file, "data dependence relation:\n");
3966 dump_data_dependence_relation (file, ddr);
3967 fprintf (file, ")\n");
3968 }
3969 }
3970
3971 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3972 {
3973 lambda_vector a_dir_v;
3974 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3975
3976 /* Direction vectors are not ordered in the same way in the DDR
3977 and in the DIR_VECTS: search for a matching vector. */
3978 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3979 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3980 break;
3981
3982 if (j == VEC_length (lambda_vector, dist_vects))
3983 {
3984 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3985 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3986 fprintf (file, "not found in Omega dir vectors:\n");
3987 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3988 fprintf (file, "data dependence relation:\n");
3989 dump_data_dependence_relation (file, ddr);
3990 fprintf (file, ")\n");
3991 }
3992 }
3993
3994 return true;
3995 }
3996
3997 /* This computes the affine dependence relation between A and B with
3998 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3999 independence between two accesses, while CHREC_DONT_KNOW is used
4000 for representing the unknown relation.
4001
4002 Note that it is possible to stop the computation of the dependence
4003 relation the first time we detect a CHREC_KNOWN element for a given
4004 subscript. */
4005
4006 static void
4007 compute_affine_dependence (struct data_dependence_relation *ddr,
4008 struct loop *loop_nest)
4009 {
4010 struct data_reference *dra = DDR_A (ddr);
4011 struct data_reference *drb = DDR_B (ddr);
4012
4013 if (dump_file && (dump_flags & TDF_DETAILS))
4014 {
4015 fprintf (dump_file, "(compute_affine_dependence\n");
4016 fprintf (dump_file, " (stmt_a = \n");
4017 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4018 fprintf (dump_file, ")\n (stmt_b = \n");
4019 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4020 fprintf (dump_file, ")\n");
4021 }
4022
4023 /* Analyze only when the dependence relation is not yet known. */
4024 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4025 && !DDR_SELF_REFERENCE (ddr))
4026 {
4027 dependence_stats.num_dependence_tests++;
4028
4029 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4030 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4031 {
4032 if (flag_check_data_deps)
4033 {
4034 /* Compute the dependences using the first algorithm. */
4035 subscript_dependence_tester (ddr, loop_nest);
4036
4037 if (dump_file && (dump_flags & TDF_DETAILS))
4038 {
4039 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4040 dump_data_dependence_relation (dump_file, ddr);
4041 }
4042
4043 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4044 {
4045 bool maybe_dependent;
4046 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4047
4048 /* Save the result of the first DD analyzer. */
4049 dist_vects = DDR_DIST_VECTS (ddr);
4050 dir_vects = DDR_DIR_VECTS (ddr);
4051
4052 /* Reset the information. */
4053 DDR_DIST_VECTS (ddr) = NULL;
4054 DDR_DIR_VECTS (ddr) = NULL;
4055
4056 /* Compute the same information using Omega. */
4057 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4058 goto csys_dont_know;
4059
4060 if (dump_file && (dump_flags & TDF_DETAILS))
4061 {
4062 fprintf (dump_file, "Omega Analyzer\n");
4063 dump_data_dependence_relation (dump_file, ddr);
4064 }
4065
4066 /* Check that we get the same information. */
4067 if (maybe_dependent)
4068 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4069 dir_vects));
4070 }
4071 }
4072 else
4073 subscript_dependence_tester (ddr, loop_nest);
4074 }
4075
4076 /* As a last case, if the dependence cannot be determined, or if
4077 the dependence is considered too difficult to determine, answer
4078 "don't know". */
4079 else
4080 {
4081 csys_dont_know:;
4082 dependence_stats.num_dependence_undetermined++;
4083
4084 if (dump_file && (dump_flags & TDF_DETAILS))
4085 {
4086 fprintf (dump_file, "Data ref a:\n");
4087 dump_data_reference (dump_file, dra);
4088 fprintf (dump_file, "Data ref b:\n");
4089 dump_data_reference (dump_file, drb);
4090 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4091 }
4092 finalize_ddr_dependent (ddr, chrec_dont_know);
4093 }
4094 }
4095
4096 if (dump_file && (dump_flags & TDF_DETAILS))
4097 fprintf (dump_file, ")\n");
4098 }
4099
4100 /* This computes the dependence relation for the same data
4101 reference into DDR. */
4102
4103 static void
4104 compute_self_dependence (struct data_dependence_relation *ddr)
4105 {
4106 unsigned int i;
4107 struct subscript *subscript;
4108
4109 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4110 return;
4111
4112 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4113 i++)
4114 {
4115 if (SUB_CONFLICTS_IN_A (subscript))
4116 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4117 if (SUB_CONFLICTS_IN_B (subscript))
4118 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4119
4120 /* The accessed index overlaps for each iteration. */
4121 SUB_CONFLICTS_IN_A (subscript)
4122 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4123 SUB_CONFLICTS_IN_B (subscript)
4124 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4125 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4126 }
4127
4128 /* The distance vector is the zero vector. */
4129 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4130 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4131 }
4132
4133 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4134 the data references in DATAREFS, in the LOOP_NEST. When
4135 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4136 relations. */
4137
4138 void
4139 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4140 VEC (ddr_p, heap) **dependence_relations,
4141 VEC (loop_p, heap) *loop_nest,
4142 bool compute_self_and_rr)
4143 {
4144 struct data_dependence_relation *ddr;
4145 struct data_reference *a, *b;
4146 unsigned int i, j;
4147
4148 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4149 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4150 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4151 {
4152 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4153 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4154 if (loop_nest)
4155 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4156 }
4157
4158 if (compute_self_and_rr)
4159 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4160 {
4161 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4162 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4163 compute_self_dependence (ddr);
4164 }
4165 }
4166
4167 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4168 true if STMT clobbers memory, false otherwise. */
4169
4170 bool
4171 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4172 {
4173 bool clobbers_memory = false;
4174 data_ref_loc *ref;
4175 tree *op0, *op1;
4176 enum gimple_code stmt_code = gimple_code (stmt);
4177
4178 *references = NULL;
4179
4180 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4181 Calls have side-effects, except those to const or pure
4182 functions. */
4183 if ((stmt_code == GIMPLE_CALL
4184 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4185 || (stmt_code == GIMPLE_ASM
4186 && gimple_asm_volatile_p (stmt)))
4187 clobbers_memory = true;
4188
4189 if (!gimple_vuse (stmt))
4190 return clobbers_memory;
4191
4192 if (stmt_code == GIMPLE_ASSIGN)
4193 {
4194 tree base;
4195 op0 = gimple_assign_lhs_ptr (stmt);
4196 op1 = gimple_assign_rhs1_ptr (stmt);
4197
4198 if (DECL_P (*op1)
4199 || (REFERENCE_CLASS_P (*op1)
4200 && (base = get_base_address (*op1))
4201 && TREE_CODE (base) != SSA_NAME))
4202 {
4203 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4204 ref->pos = op1;
4205 ref->is_read = true;
4206 }
4207 }
4208 else if (stmt_code == GIMPLE_CALL)
4209 {
4210 unsigned i, n;
4211
4212 op0 = gimple_call_lhs_ptr (stmt);
4213 n = gimple_call_num_args (stmt);
4214 for (i = 0; i < n; i++)
4215 {
4216 op1 = gimple_call_arg_ptr (stmt, i);
4217
4218 if (DECL_P (*op1)
4219 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4220 {
4221 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4222 ref->pos = op1;
4223 ref->is_read = true;
4224 }
4225 }
4226 }
4227 else
4228 return clobbers_memory;
4229
4230 if (*op0
4231 && (DECL_P (*op0)
4232 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4233 {
4234 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4235 ref->pos = op0;
4236 ref->is_read = false;
4237 }
4238 return clobbers_memory;
4239 }
4240
4241 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4242 reference, returns false, otherwise returns true. NEST is the outermost
4243 loop of the loop nest in which the references should be analyzed. */
4244
4245 bool
4246 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4247 VEC (data_reference_p, heap) **datarefs)
4248 {
4249 unsigned i;
4250 VEC (data_ref_loc, heap) *references;
4251 data_ref_loc *ref;
4252 bool ret = true;
4253 data_reference_p dr;
4254
4255 if (get_references_in_stmt (stmt, &references))
4256 {
4257 VEC_free (data_ref_loc, heap, references);
4258 return false;
4259 }
4260
4261 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4262 {
4263 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4264 *ref->pos, stmt, ref->is_read);
4265 gcc_assert (dr != NULL);
4266 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4267 }
4268 VEC_free (data_ref_loc, heap, references);
4269 return ret;
4270 }
4271
4272 /* Stores the data references in STMT to DATAREFS. If there is an
4273 unanalyzable reference, returns false, otherwise returns true.
4274 NEST is the outermost loop of the loop nest in which the references
4275 should be instantiated, LOOP is the loop in which the references
4276 should be analyzed. */
4277
4278 bool
4279 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4280 VEC (data_reference_p, heap) **datarefs)
4281 {
4282 unsigned i;
4283 VEC (data_ref_loc, heap) *references;
4284 data_ref_loc *ref;
4285 bool ret = true;
4286 data_reference_p dr;
4287
4288 if (get_references_in_stmt (stmt, &references))
4289 {
4290 VEC_free (data_ref_loc, heap, references);
4291 return false;
4292 }
4293
4294 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4295 {
4296 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4297 gcc_assert (dr != NULL);
4298 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4299 }
4300
4301 VEC_free (data_ref_loc, heap, references);
4302 return ret;
4303 }
4304
4305 /* Search the data references in LOOP, and record the information into
4306 DATAREFS. Returns chrec_dont_know when failing to analyze a
4307 difficult case, returns NULL_TREE otherwise. */
4308
4309 tree
4310 find_data_references_in_bb (struct loop *loop, basic_block bb,
4311 VEC (data_reference_p, heap) **datarefs)
4312 {
4313 gimple_stmt_iterator bsi;
4314
4315 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4316 {
4317 gimple stmt = gsi_stmt (bsi);
4318
4319 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4320 {
4321 struct data_reference *res;
4322 res = XCNEW (struct data_reference);
4323 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4324
4325 return chrec_dont_know;
4326 }
4327 }
4328
4329 return NULL_TREE;
4330 }
4331
4332 /* Search the data references in LOOP, and record the information into
4333 DATAREFS. Returns chrec_dont_know when failing to analyze a
4334 difficult case, returns NULL_TREE otherwise.
4335
4336 TODO: This function should be made smarter so that it can handle address
4337 arithmetic as if they were array accesses, etc. */
4338
4339 tree
4340 find_data_references_in_loop (struct loop *loop,
4341 VEC (data_reference_p, heap) **datarefs)
4342 {
4343 basic_block bb, *bbs;
4344 unsigned int i;
4345
4346 bbs = get_loop_body_in_dom_order (loop);
4347
4348 for (i = 0; i < loop->num_nodes; i++)
4349 {
4350 bb = bbs[i];
4351
4352 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4353 {
4354 free (bbs);
4355 return chrec_dont_know;
4356 }
4357 }
4358 free (bbs);
4359
4360 return NULL_TREE;
4361 }
4362
4363 /* Recursive helper function. */
4364
4365 static bool
4366 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4367 {
4368 /* Inner loops of the nest should not contain siblings. Example:
4369 when there are two consecutive loops,
4370
4371 | loop_0
4372 | loop_1
4373 | A[{0, +, 1}_1]
4374 | endloop_1
4375 | loop_2
4376 | A[{0, +, 1}_2]
4377 | endloop_2
4378 | endloop_0
4379
4380 the dependence relation cannot be captured by the distance
4381 abstraction. */
4382 if (loop->next)
4383 return false;
4384
4385 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4386 if (loop->inner)
4387 return find_loop_nest_1 (loop->inner, loop_nest);
4388 return true;
4389 }
4390
4391 /* Return false when the LOOP is not well nested. Otherwise return
4392 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4393 contain the loops from the outermost to the innermost, as they will
4394 appear in the classic distance vector. */
4395
4396 bool
4397 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4398 {
4399 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4400 if (loop->inner)
4401 return find_loop_nest_1 (loop->inner, loop_nest);
4402 return true;
4403 }
4404
4405 /* Returns true when the data dependences have been computed, false otherwise.
4406 Given a loop nest LOOP, the following vectors are returned:
4407 DATAREFS is initialized to all the array elements contained in this loop,
4408 DEPENDENCE_RELATIONS contains the relations between the data references.
4409 Compute read-read and self relations if
4410 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4411
4412 bool
4413 compute_data_dependences_for_loop (struct loop *loop,
4414 bool compute_self_and_read_read_dependences,
4415 VEC (loop_p, heap) **loop_nest,
4416 VEC (data_reference_p, heap) **datarefs,
4417 VEC (ddr_p, heap) **dependence_relations)
4418 {
4419 bool res = true;
4420
4421 memset (&dependence_stats, 0, sizeof (dependence_stats));
4422
4423 /* If the loop nest is not well formed, or one of the data references
4424 is not computable, give up without spending time to compute other
4425 dependences. */
4426 if (!loop
4427 || !find_loop_nest (loop, loop_nest)
4428 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4429 {
4430 struct data_dependence_relation *ddr;
4431
4432 /* Insert a single relation into dependence_relations:
4433 chrec_dont_know. */
4434 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4435 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4436 res = false;
4437 }
4438 else
4439 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4440 compute_self_and_read_read_dependences);
4441
4442 if (dump_file && (dump_flags & TDF_STATS))
4443 {
4444 fprintf (dump_file, "Dependence tester statistics:\n");
4445
4446 fprintf (dump_file, "Number of dependence tests: %d\n",
4447 dependence_stats.num_dependence_tests);
4448 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4449 dependence_stats.num_dependence_dependent);
4450 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4451 dependence_stats.num_dependence_independent);
4452 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4453 dependence_stats.num_dependence_undetermined);
4454
4455 fprintf (dump_file, "Number of subscript tests: %d\n",
4456 dependence_stats.num_subscript_tests);
4457 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4458 dependence_stats.num_subscript_undetermined);
4459 fprintf (dump_file, "Number of same subscript function: %d\n",
4460 dependence_stats.num_same_subscript_function);
4461
4462 fprintf (dump_file, "Number of ziv tests: %d\n",
4463 dependence_stats.num_ziv);
4464 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4465 dependence_stats.num_ziv_dependent);
4466 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4467 dependence_stats.num_ziv_independent);
4468 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4469 dependence_stats.num_ziv_unimplemented);
4470
4471 fprintf (dump_file, "Number of siv tests: %d\n",
4472 dependence_stats.num_siv);
4473 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4474 dependence_stats.num_siv_dependent);
4475 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4476 dependence_stats.num_siv_independent);
4477 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4478 dependence_stats.num_siv_unimplemented);
4479
4480 fprintf (dump_file, "Number of miv tests: %d\n",
4481 dependence_stats.num_miv);
4482 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4483 dependence_stats.num_miv_dependent);
4484 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4485 dependence_stats.num_miv_independent);
4486 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4487 dependence_stats.num_miv_unimplemented);
4488 }
4489
4490 return res;
4491 }
4492
4493 /* Returns true when the data dependences for the basic block BB have been
4494 computed, false otherwise.
4495 DATAREFS is initialized to all the array elements contained in this basic
4496 block, DEPENDENCE_RELATIONS contains the relations between the data
4497 references. Compute read-read and self relations if
4498 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4499 bool
4500 compute_data_dependences_for_bb (basic_block bb,
4501 bool compute_self_and_read_read_dependences,
4502 VEC (data_reference_p, heap) **datarefs,
4503 VEC (ddr_p, heap) **dependence_relations)
4504 {
4505 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4506 return false;
4507
4508 compute_all_dependences (*datarefs, dependence_relations, NULL,
4509 compute_self_and_read_read_dependences);
4510 return true;
4511 }
4512
4513 /* Entry point (for testing only). Analyze all the data references
4514 and the dependence relations in LOOP.
4515
4516 The data references are computed first.
4517
4518 A relation on these nodes is represented by a complete graph. Some
4519 of the relations could be of no interest, thus the relations can be
4520 computed on demand.
4521
4522 In the following function we compute all the relations. This is
4523 just a first implementation that is here for:
4524 - for showing how to ask for the dependence relations,
4525 - for the debugging the whole dependence graph,
4526 - for the dejagnu testcases and maintenance.
4527
4528 It is possible to ask only for a part of the graph, avoiding to
4529 compute the whole dependence graph. The computed dependences are
4530 stored in a knowledge base (KB) such that later queries don't
4531 recompute the same information. The implementation of this KB is
4532 transparent to the optimizer, and thus the KB can be changed with a
4533 more efficient implementation, or the KB could be disabled. */
4534 static void
4535 analyze_all_data_dependences (struct loop *loop)
4536 {
4537 unsigned int i;
4538 int nb_data_refs = 10;
4539 VEC (data_reference_p, heap) *datarefs =
4540 VEC_alloc (data_reference_p, heap, nb_data_refs);
4541 VEC (ddr_p, heap) *dependence_relations =
4542 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4543 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4544
4545 /* Compute DDs on the whole function. */
4546 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4547 &dependence_relations);
4548
4549 if (dump_file)
4550 {
4551 dump_data_dependence_relations (dump_file, dependence_relations);
4552 fprintf (dump_file, "\n\n");
4553
4554 if (dump_flags & TDF_DETAILS)
4555 dump_dist_dir_vectors (dump_file, dependence_relations);
4556
4557 if (dump_flags & TDF_STATS)
4558 {
4559 unsigned nb_top_relations = 0;
4560 unsigned nb_bot_relations = 0;
4561 unsigned nb_chrec_relations = 0;
4562 struct data_dependence_relation *ddr;
4563
4564 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4565 {
4566 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4567 nb_top_relations++;
4568
4569 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4570 nb_bot_relations++;
4571
4572 else
4573 nb_chrec_relations++;
4574 }
4575
4576 gather_stats_on_scev_database ();
4577 }
4578 }
4579
4580 VEC_free (loop_p, heap, loop_nest);
4581 free_dependence_relations (dependence_relations);
4582 free_data_refs (datarefs);
4583 }
4584
4585 /* Computes all the data dependences and check that the results of
4586 several analyzers are the same. */
4587
4588 void
4589 tree_check_data_deps (void)
4590 {
4591 loop_iterator li;
4592 struct loop *loop_nest;
4593
4594 FOR_EACH_LOOP (li, loop_nest, 0)
4595 analyze_all_data_dependences (loop_nest);
4596 }
4597
4598 /* Free the memory used by a data dependence relation DDR. */
4599
4600 void
4601 free_dependence_relation (struct data_dependence_relation *ddr)
4602 {
4603 if (ddr == NULL)
4604 return;
4605
4606 if (DDR_SUBSCRIPTS (ddr))
4607 free_subscripts (DDR_SUBSCRIPTS (ddr));
4608 if (DDR_DIST_VECTS (ddr))
4609 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4610 if (DDR_DIR_VECTS (ddr))
4611 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4612
4613 free (ddr);
4614 }
4615
4616 /* Free the memory used by the data dependence relations from
4617 DEPENDENCE_RELATIONS. */
4618
4619 void
4620 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4621 {
4622 unsigned int i;
4623 struct data_dependence_relation *ddr;
4624
4625 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4626 if (ddr)
4627 free_dependence_relation (ddr);
4628
4629 VEC_free (ddr_p, heap, dependence_relations);
4630 }
4631
4632 /* Free the memory used by the data references from DATAREFS. */
4633
4634 void
4635 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4636 {
4637 unsigned int i;
4638 struct data_reference *dr;
4639
4640 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4641 free_data_ref (dr);
4642 VEC_free (data_reference_p, heap, datarefs);
4643 }
4644
4645 \f
4646
4647 /* Dump vertex I in RDG to FILE. */
4648
4649 void
4650 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4651 {
4652 struct vertex *v = &(rdg->vertices[i]);
4653 struct graph_edge *e;
4654
4655 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4656 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4657 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4658
4659 if (v->pred)
4660 for (e = v->pred; e; e = e->pred_next)
4661 fprintf (file, " %d", e->src);
4662
4663 fprintf (file, ") (out:");
4664
4665 if (v->succ)
4666 for (e = v->succ; e; e = e->succ_next)
4667 fprintf (file, " %d", e->dest);
4668
4669 fprintf (file, ")\n");
4670 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4671 fprintf (file, ")\n");
4672 }
4673
4674 /* Call dump_rdg_vertex on stderr. */
4675
4676 DEBUG_FUNCTION void
4677 debug_rdg_vertex (struct graph *rdg, int i)
4678 {
4679 dump_rdg_vertex (stderr, rdg, i);
4680 }
4681
4682 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4683 dumped vertices to that bitmap. */
4684
4685 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4686 {
4687 int i;
4688
4689 fprintf (file, "(%d\n", c);
4690
4691 for (i = 0; i < rdg->n_vertices; i++)
4692 if (rdg->vertices[i].component == c)
4693 {
4694 if (dumped)
4695 bitmap_set_bit (dumped, i);
4696
4697 dump_rdg_vertex (file, rdg, i);
4698 }
4699
4700 fprintf (file, ")\n");
4701 }
4702
4703 /* Call dump_rdg_vertex on stderr. */
4704
4705 DEBUG_FUNCTION void
4706 debug_rdg_component (struct graph *rdg, int c)
4707 {
4708 dump_rdg_component (stderr, rdg, c, NULL);
4709 }
4710
4711 /* Dump the reduced dependence graph RDG to FILE. */
4712
4713 void
4714 dump_rdg (FILE *file, struct graph *rdg)
4715 {
4716 int i;
4717 bitmap dumped = BITMAP_ALLOC (NULL);
4718
4719 fprintf (file, "(rdg\n");
4720
4721 for (i = 0; i < rdg->n_vertices; i++)
4722 if (!bitmap_bit_p (dumped, i))
4723 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4724
4725 fprintf (file, ")\n");
4726 BITMAP_FREE (dumped);
4727 }
4728
4729 /* Call dump_rdg on stderr. */
4730
4731 DEBUG_FUNCTION void
4732 debug_rdg (struct graph *rdg)
4733 {
4734 dump_rdg (stderr, rdg);
4735 }
4736
4737 static void
4738 dot_rdg_1 (FILE *file, struct graph *rdg)
4739 {
4740 int i;
4741
4742 fprintf (file, "digraph RDG {\n");
4743
4744 for (i = 0; i < rdg->n_vertices; i++)
4745 {
4746 struct vertex *v = &(rdg->vertices[i]);
4747 struct graph_edge *e;
4748
4749 /* Highlight reads from memory. */
4750 if (RDG_MEM_READS_STMT (rdg, i))
4751 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4752
4753 /* Highlight stores to memory. */
4754 if (RDG_MEM_WRITE_STMT (rdg, i))
4755 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4756
4757 if (v->succ)
4758 for (e = v->succ; e; e = e->succ_next)
4759 switch (RDGE_TYPE (e))
4760 {
4761 case input_dd:
4762 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4763 break;
4764
4765 case output_dd:
4766 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4767 break;
4768
4769 case flow_dd:
4770 /* These are the most common dependences: don't print these. */
4771 fprintf (file, "%d -> %d \n", i, e->dest);
4772 break;
4773
4774 case anti_dd:
4775 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4776 break;
4777
4778 default:
4779 gcc_unreachable ();
4780 }
4781 }
4782
4783 fprintf (file, "}\n\n");
4784 }
4785
4786 /* Display the Reduced Dependence Graph using dotty. */
4787 extern void dot_rdg (struct graph *);
4788
4789 DEBUG_FUNCTION void
4790 dot_rdg (struct graph *rdg)
4791 {
4792 /* When debugging, enable the following code. This cannot be used
4793 in production compilers because it calls "system". */
4794 #if 0
4795 FILE *file = fopen ("/tmp/rdg.dot", "w");
4796 gcc_assert (file != NULL);
4797
4798 dot_rdg_1 (file, rdg);
4799 fclose (file);
4800
4801 system ("dotty /tmp/rdg.dot &");
4802 #else
4803 dot_rdg_1 (stderr, rdg);
4804 #endif
4805 }
4806
4807 /* This structure is used for recording the mapping statement index in
4808 the RDG. */
4809
4810 struct GTY(()) rdg_vertex_info
4811 {
4812 gimple stmt;
4813 int index;
4814 };
4815
4816 /* Returns the index of STMT in RDG. */
4817
4818 int
4819 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4820 {
4821 struct rdg_vertex_info rvi, *slot;
4822
4823 rvi.stmt = stmt;
4824 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4825
4826 if (!slot)
4827 return -1;
4828
4829 return slot->index;
4830 }
4831
4832 /* Creates an edge in RDG for each distance vector from DDR. The
4833 order that we keep track of in the RDG is the order in which
4834 statements have to be executed. */
4835
4836 static void
4837 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4838 {
4839 struct graph_edge *e;
4840 int va, vb;
4841 data_reference_p dra = DDR_A (ddr);
4842 data_reference_p drb = DDR_B (ddr);
4843 unsigned level = ddr_dependence_level (ddr);
4844
4845 /* For non scalar dependences, when the dependence is REVERSED,
4846 statement B has to be executed before statement A. */
4847 if (level > 0
4848 && !DDR_REVERSED_P (ddr))
4849 {
4850 data_reference_p tmp = dra;
4851 dra = drb;
4852 drb = tmp;
4853 }
4854
4855 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4856 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4857
4858 if (va < 0 || vb < 0)
4859 return;
4860
4861 e = add_edge (rdg, va, vb);
4862 e->data = XNEW (struct rdg_edge);
4863
4864 RDGE_LEVEL (e) = level;
4865 RDGE_RELATION (e) = ddr;
4866
4867 /* Determines the type of the data dependence. */
4868 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4869 RDGE_TYPE (e) = input_dd;
4870 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4871 RDGE_TYPE (e) = output_dd;
4872 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4873 RDGE_TYPE (e) = flow_dd;
4874 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4875 RDGE_TYPE (e) = anti_dd;
4876 }
4877
4878 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4879 the index of DEF in RDG. */
4880
4881 static void
4882 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4883 {
4884 use_operand_p imm_use_p;
4885 imm_use_iterator iterator;
4886
4887 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4888 {
4889 struct graph_edge *e;
4890 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4891
4892 if (use < 0)
4893 continue;
4894
4895 e = add_edge (rdg, idef, use);
4896 e->data = XNEW (struct rdg_edge);
4897 RDGE_TYPE (e) = flow_dd;
4898 RDGE_RELATION (e) = NULL;
4899 }
4900 }
4901
4902 /* Creates the edges of the reduced dependence graph RDG. */
4903
4904 static void
4905 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4906 {
4907 int i;
4908 struct data_dependence_relation *ddr;
4909 def_operand_p def_p;
4910 ssa_op_iter iter;
4911
4912 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4913 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4914 create_rdg_edge_for_ddr (rdg, ddr);
4915
4916 for (i = 0; i < rdg->n_vertices; i++)
4917 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4918 iter, SSA_OP_DEF)
4919 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4920 }
4921
4922 /* Build the vertices of the reduced dependence graph RDG. */
4923
4924 void
4925 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4926 {
4927 int i, j;
4928 gimple stmt;
4929
4930 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4931 {
4932 VEC (data_ref_loc, heap) *references;
4933 data_ref_loc *ref;
4934 struct vertex *v = &(rdg->vertices[i]);
4935 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4936 struct rdg_vertex_info **slot;
4937
4938 rvi->stmt = stmt;
4939 rvi->index = i;
4940 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4941
4942 if (!*slot)
4943 *slot = rvi;
4944 else
4945 free (rvi);
4946
4947 v->data = XNEW (struct rdg_vertex);
4948 RDG_STMT (rdg, i) = stmt;
4949
4950 RDG_MEM_WRITE_STMT (rdg, i) = false;
4951 RDG_MEM_READS_STMT (rdg, i) = false;
4952 if (gimple_code (stmt) == GIMPLE_PHI)
4953 continue;
4954
4955 get_references_in_stmt (stmt, &references);
4956 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4957 if (!ref->is_read)
4958 RDG_MEM_WRITE_STMT (rdg, i) = true;
4959 else
4960 RDG_MEM_READS_STMT (rdg, i) = true;
4961
4962 VEC_free (data_ref_loc, heap, references);
4963 }
4964 }
4965
4966 /* Initialize STMTS with all the statements of LOOP. When
4967 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4968 which we discover statements is important as
4969 generate_loops_for_partition is using the same traversal for
4970 identifying statements. */
4971
4972 static void
4973 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4974 {
4975 unsigned int i;
4976 basic_block *bbs = get_loop_body_in_dom_order (loop);
4977
4978 for (i = 0; i < loop->num_nodes; i++)
4979 {
4980 basic_block bb = bbs[i];
4981 gimple_stmt_iterator bsi;
4982 gimple stmt;
4983
4984 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4985 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4986
4987 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4988 {
4989 stmt = gsi_stmt (bsi);
4990 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4991 VEC_safe_push (gimple, heap, *stmts, stmt);
4992 }
4993 }
4994
4995 free (bbs);
4996 }
4997
4998 /* Returns true when all the dependences are computable. */
4999
5000 static bool
5001 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5002 {
5003 ddr_p ddr;
5004 unsigned int i;
5005
5006 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5007 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5008 return false;
5009
5010 return true;
5011 }
5012
5013 /* Computes a hash function for element ELT. */
5014
5015 static hashval_t
5016 hash_stmt_vertex_info (const void *elt)
5017 {
5018 const struct rdg_vertex_info *const rvi =
5019 (const struct rdg_vertex_info *) elt;
5020 gimple stmt = rvi->stmt;
5021
5022 return htab_hash_pointer (stmt);
5023 }
5024
5025 /* Compares database elements E1 and E2. */
5026
5027 static int
5028 eq_stmt_vertex_info (const void *e1, const void *e2)
5029 {
5030 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5031 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5032
5033 return elt1->stmt == elt2->stmt;
5034 }
5035
5036 /* Free the element E. */
5037
5038 static void
5039 hash_stmt_vertex_del (void *e)
5040 {
5041 free (e);
5042 }
5043
5044 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5045 statement of the loop nest, and one edge per data dependence or
5046 scalar dependence. */
5047
5048 struct graph *
5049 build_empty_rdg (int n_stmts)
5050 {
5051 int nb_data_refs = 10;
5052 struct graph *rdg = new_graph (n_stmts);
5053
5054 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5055 eq_stmt_vertex_info, hash_stmt_vertex_del);
5056 return rdg;
5057 }
5058
5059 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5060 statement of the loop nest, and one edge per data dependence or
5061 scalar dependence. */
5062
5063 struct graph *
5064 build_rdg (struct loop *loop,
5065 VEC (loop_p, heap) **loop_nest,
5066 VEC (ddr_p, heap) **dependence_relations,
5067 VEC (data_reference_p, heap) **datarefs)
5068 {
5069 struct graph *rdg = NULL;
5070 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5071
5072 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5073 dependence_relations);
5074
5075 if (known_dependences_p (*dependence_relations))
5076 {
5077 stmts_from_loop (loop, &stmts);
5078 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5079 create_rdg_vertices (rdg, stmts);
5080 create_rdg_edges (rdg, *dependence_relations);
5081 }
5082
5083 VEC_free (gimple, heap, stmts);
5084 return rdg;
5085 }
5086
5087 /* Free the reduced dependence graph RDG. */
5088
5089 void
5090 free_rdg (struct graph *rdg)
5091 {
5092 int i;
5093
5094 for (i = 0; i < rdg->n_vertices; i++)
5095 {
5096 struct vertex *v = &(rdg->vertices[i]);
5097 struct graph_edge *e;
5098
5099 for (e = v->succ; e; e = e->succ_next)
5100 free (e->data);
5101
5102 free (v->data);
5103 }
5104
5105 htab_delete (rdg->indices);
5106 free_graph (rdg);
5107 }
5108
5109 /* Initialize STMTS with all the statements of LOOP that contain a
5110 store to memory. */
5111
5112 void
5113 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5114 {
5115 unsigned int i;
5116 basic_block *bbs = get_loop_body_in_dom_order (loop);
5117
5118 for (i = 0; i < loop->num_nodes; i++)
5119 {
5120 basic_block bb = bbs[i];
5121 gimple_stmt_iterator bsi;
5122
5123 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5124 if (gimple_vdef (gsi_stmt (bsi)))
5125 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5126 }
5127
5128 free (bbs);
5129 }
5130
5131 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5132 that contains a data reference on its LHS with a stride of the same
5133 size as its unit type. */
5134
5135 bool
5136 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5137 {
5138 tree op0, op1;
5139 bool res;
5140 struct data_reference *dr;
5141
5142 if (!stmt
5143 || !gimple_vdef (stmt)
5144 || !is_gimple_assign (stmt)
5145 || !gimple_assign_single_p (stmt)
5146 || !(op1 = gimple_assign_rhs1 (stmt))
5147 || !(integer_zerop (op1) || real_zerop (op1)))
5148 return false;
5149
5150 dr = XCNEW (struct data_reference);
5151 op0 = gimple_assign_lhs (stmt);
5152
5153 DR_STMT (dr) = stmt;
5154 DR_REF (dr) = op0;
5155
5156 res = dr_analyze_innermost (dr)
5157 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5158
5159 free_data_ref (dr);
5160 return res;
5161 }
5162
5163 /* Initialize STMTS with all the statements of LOOP that contain a
5164 store to memory of the form "A[i] = 0". */
5165
5166 void
5167 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5168 {
5169 unsigned int i;
5170 basic_block bb;
5171 gimple_stmt_iterator si;
5172 gimple stmt;
5173 basic_block *bbs = get_loop_body_in_dom_order (loop);
5174
5175 for (i = 0; i < loop->num_nodes; i++)
5176 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5177 if ((stmt = gsi_stmt (si))
5178 && stmt_with_adjacent_zero_store_dr_p (stmt))
5179 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5180
5181 free (bbs);
5182 }
5183
5184 /* For a data reference REF, return the declaration of its base
5185 address or NULL_TREE if the base is not determined. */
5186
5187 static inline tree
5188 ref_base_address (gimple stmt, data_ref_loc *ref)
5189 {
5190 tree base = NULL_TREE;
5191 tree base_address;
5192 struct data_reference *dr = XCNEW (struct data_reference);
5193
5194 DR_STMT (dr) = stmt;
5195 DR_REF (dr) = *ref->pos;
5196 dr_analyze_innermost (dr);
5197 base_address = DR_BASE_ADDRESS (dr);
5198
5199 if (!base_address)
5200 goto end;
5201
5202 switch (TREE_CODE (base_address))
5203 {
5204 case ADDR_EXPR:
5205 base = TREE_OPERAND (base_address, 0);
5206 break;
5207
5208 default:
5209 base = base_address;
5210 break;
5211 }
5212
5213 end:
5214 free_data_ref (dr);
5215 return base;
5216 }
5217
5218 /* Determines whether the statement from vertex V of the RDG has a
5219 definition used outside the loop that contains this statement. */
5220
5221 bool
5222 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5223 {
5224 gimple stmt = RDG_STMT (rdg, v);
5225 struct loop *loop = loop_containing_stmt (stmt);
5226 use_operand_p imm_use_p;
5227 imm_use_iterator iterator;
5228 ssa_op_iter it;
5229 def_operand_p def_p;
5230
5231 if (!loop)
5232 return true;
5233
5234 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5235 {
5236 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5237 {
5238 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5239 return true;
5240 }
5241 }
5242
5243 return false;
5244 }
5245
5246 /* Determines whether statements S1 and S2 access to similar memory
5247 locations. Two memory accesses are considered similar when they
5248 have the same base address declaration, i.e. when their
5249 ref_base_address is the same. */
5250
5251 bool
5252 have_similar_memory_accesses (gimple s1, gimple s2)
5253 {
5254 bool res = false;
5255 unsigned i, j;
5256 VEC (data_ref_loc, heap) *refs1, *refs2;
5257 data_ref_loc *ref1, *ref2;
5258
5259 get_references_in_stmt (s1, &refs1);
5260 get_references_in_stmt (s2, &refs2);
5261
5262 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5263 {
5264 tree base1 = ref_base_address (s1, ref1);
5265
5266 if (base1)
5267 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5268 if (base1 == ref_base_address (s2, ref2))
5269 {
5270 res = true;
5271 goto end;
5272 }
5273 }
5274
5275 end:
5276 VEC_free (data_ref_loc, heap, refs1);
5277 VEC_free (data_ref_loc, heap, refs2);
5278 return res;
5279 }
5280
5281 /* Helper function for the hashtab. */
5282
5283 static int
5284 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5285 {
5286 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5287 CONST_CAST_GIMPLE ((const_gimple) s2));
5288 }
5289
5290 /* Helper function for the hashtab. */
5291
5292 static hashval_t
5293 ref_base_address_1 (const void *s)
5294 {
5295 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5296 unsigned i;
5297 VEC (data_ref_loc, heap) *refs;
5298 data_ref_loc *ref;
5299 hashval_t res = 0;
5300
5301 get_references_in_stmt (stmt, &refs);
5302
5303 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5304 if (!ref->is_read)
5305 {
5306 res = htab_hash_pointer (ref_base_address (stmt, ref));
5307 break;
5308 }
5309
5310 VEC_free (data_ref_loc, heap, refs);
5311 return res;
5312 }
5313
5314 /* Try to remove duplicated write data references from STMTS. */
5315
5316 void
5317 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5318 {
5319 unsigned i;
5320 gimple stmt;
5321 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5322 have_similar_memory_accesses_1, NULL);
5323
5324 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5325 {
5326 void **slot;
5327
5328 slot = htab_find_slot (seen, stmt, INSERT);
5329
5330 if (*slot)
5331 VEC_ordered_remove (gimple, *stmts, i);
5332 else
5333 {
5334 *slot = (void *) stmt;
5335 i++;
5336 }
5337 }
5338
5339 htab_delete (seen);
5340 }
5341
5342 /* Returns the index of PARAMETER in the parameters vector of the
5343 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5344
5345 int
5346 access_matrix_get_index_for_parameter (tree parameter,
5347 struct access_matrix *access_matrix)
5348 {
5349 int i;
5350 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5351 tree lambda_parameter;
5352
5353 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5354 if (lambda_parameter == parameter)
5355 return i + AM_NB_INDUCTION_VARS (access_matrix);
5356
5357 return -1;
5358 }