]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-data-ref.c
2012-05-11 Richard Guenther <rguenther@suse.de>
[thirdparty/gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88 #include "params.h"
89
90 static struct datadep_stats
91 {
92 int num_dependence_tests;
93 int num_dependence_dependent;
94 int num_dependence_independent;
95 int num_dependence_undetermined;
96
97 int num_subscript_tests;
98 int num_subscript_undetermined;
99 int num_same_subscript_function;
100
101 int num_ziv;
102 int num_ziv_independent;
103 int num_ziv_dependent;
104 int num_ziv_unimplemented;
105
106 int num_siv;
107 int num_siv_independent;
108 int num_siv_dependent;
109 int num_siv_unimplemented;
110
111 int num_miv;
112 int num_miv_independent;
113 int num_miv_dependent;
114 int num_miv_unimplemented;
115 } dependence_stats;
116
117 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
118 struct data_reference *,
119 struct data_reference *,
120 struct loop *);
121 /* Returns true iff A divides B. */
122
123 static inline bool
124 tree_fold_divides_p (const_tree a, const_tree b)
125 {
126 gcc_assert (TREE_CODE (a) == INTEGER_CST);
127 gcc_assert (TREE_CODE (b) == INTEGER_CST);
128 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 }
130
131 /* Returns true iff A divides B. */
132
133 static inline bool
134 int_divides_p (int a, int b)
135 {
136 return ((b % a) == 0);
137 }
138
139 \f
140
141 /* Dump into FILE all the data references from DATAREFS. */
142
143 static void
144 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145 {
146 unsigned int i;
147 struct data_reference *dr;
148
149 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
150 dump_data_reference (file, dr);
151 }
152
153 /* Dump into STDERR all the data references from DATAREFS. */
154
155 DEBUG_FUNCTION void
156 debug_data_references (VEC (data_reference_p, heap) *datarefs)
157 {
158 dump_data_references (stderr, datarefs);
159 }
160
161 /* Print to STDERR the data_reference DR. */
162
163 DEBUG_FUNCTION void
164 debug_data_reference (struct data_reference *dr)
165 {
166 dump_data_reference (stderr, dr);
167 }
168
169 /* Dump function for a DATA_REFERENCE structure. */
170
171 void
172 dump_data_reference (FILE *outf,
173 struct data_reference *dr)
174 {
175 unsigned int i;
176
177 fprintf (outf, "#(Data Ref: \n");
178 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
179 fprintf (outf, "# stmt: ");
180 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
181 fprintf (outf, "# ref: ");
182 print_generic_stmt (outf, DR_REF (dr), 0);
183 fprintf (outf, "# base_object: ");
184 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
185
186 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
187 {
188 fprintf (outf, "# Access function %d: ", i);
189 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
190 }
191 fprintf (outf, "#)\n");
192 }
193
194 /* Dumps the affine function described by FN to the file OUTF. */
195
196 static void
197 dump_affine_function (FILE *outf, affine_fn fn)
198 {
199 unsigned i;
200 tree coef;
201
202 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
203 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
204 {
205 fprintf (outf, " + ");
206 print_generic_expr (outf, coef, TDF_SLIM);
207 fprintf (outf, " * x_%u", i);
208 }
209 }
210
211 /* Dumps the conflict function CF to the file OUTF. */
212
213 static void
214 dump_conflict_function (FILE *outf, conflict_function *cf)
215 {
216 unsigned i;
217
218 if (cf->n == NO_DEPENDENCE)
219 fprintf (outf, "no dependence\n");
220 else if (cf->n == NOT_KNOWN)
221 fprintf (outf, "not known\n");
222 else
223 {
224 for (i = 0; i < cf->n; i++)
225 {
226 fprintf (outf, "[");
227 dump_affine_function (outf, cf->fns[i]);
228 fprintf (outf, "]\n");
229 }
230 }
231 }
232
233 /* Dump function for a SUBSCRIPT structure. */
234
235 static void
236 dump_subscript (FILE *outf, struct subscript *subscript)
237 {
238 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
239
240 fprintf (outf, "\n (subscript \n");
241 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
242 dump_conflict_function (outf, cf);
243 if (CF_NONTRIVIAL_P (cf))
244 {
245 tree last_iteration = SUB_LAST_CONFLICT (subscript);
246 fprintf (outf, " last_conflict: ");
247 print_generic_stmt (outf, last_iteration, 0);
248 }
249
250 cf = SUB_CONFLICTS_IN_B (subscript);
251 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
252 dump_conflict_function (outf, cf);
253 if (CF_NONTRIVIAL_P (cf))
254 {
255 tree last_iteration = SUB_LAST_CONFLICT (subscript);
256 fprintf (outf, " last_conflict: ");
257 print_generic_stmt (outf, last_iteration, 0);
258 }
259
260 fprintf (outf, " (Subscript distance: ");
261 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
262 fprintf (outf, " )\n");
263 fprintf (outf, " )\n");
264 }
265
266 /* Print the classic direction vector DIRV to OUTF. */
267
268 static void
269 print_direction_vector (FILE *outf,
270 lambda_vector dirv,
271 int length)
272 {
273 int eq;
274
275 for (eq = 0; eq < length; eq++)
276 {
277 enum data_dependence_direction dir = ((enum data_dependence_direction)
278 dirv[eq]);
279
280 switch (dir)
281 {
282 case dir_positive:
283 fprintf (outf, " +");
284 break;
285 case dir_negative:
286 fprintf (outf, " -");
287 break;
288 case dir_equal:
289 fprintf (outf, " =");
290 break;
291 case dir_positive_or_equal:
292 fprintf (outf, " +=");
293 break;
294 case dir_positive_or_negative:
295 fprintf (outf, " +-");
296 break;
297 case dir_negative_or_equal:
298 fprintf (outf, " -=");
299 break;
300 case dir_star:
301 fprintf (outf, " *");
302 break;
303 default:
304 fprintf (outf, "indep");
305 break;
306 }
307 }
308 fprintf (outf, "\n");
309 }
310
311 /* Print a vector of direction vectors. */
312
313 static void
314 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
315 int length)
316 {
317 unsigned j;
318 lambda_vector v;
319
320 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
321 print_direction_vector (outf, v, length);
322 }
323
324 /* Print out a vector VEC of length N to OUTFILE. */
325
326 static inline void
327 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
328 {
329 int i;
330
331 for (i = 0; i < n; i++)
332 fprintf (outfile, "%3d ", vector[i]);
333 fprintf (outfile, "\n");
334 }
335
336 /* Print a vector of distance vectors. */
337
338 static void
339 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
340 int length)
341 {
342 unsigned j;
343 lambda_vector v;
344
345 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
346 print_lambda_vector (outf, v, length);
347 }
348
349 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
350
351 static void
352 dump_data_dependence_relation (FILE *outf,
353 struct data_dependence_relation *ddr)
354 {
355 struct data_reference *dra, *drb;
356
357 fprintf (outf, "(Data Dep: \n");
358
359 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
360 {
361 if (ddr)
362 {
363 dra = DDR_A (ddr);
364 drb = DDR_B (ddr);
365 if (dra)
366 dump_data_reference (outf, dra);
367 else
368 fprintf (outf, " (nil)\n");
369 if (drb)
370 dump_data_reference (outf, drb);
371 else
372 fprintf (outf, " (nil)\n");
373 }
374 fprintf (outf, " (don't know)\n)\n");
375 return;
376 }
377
378 dra = DDR_A (ddr);
379 drb = DDR_B (ddr);
380 dump_data_reference (outf, dra);
381 dump_data_reference (outf, drb);
382
383 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
384 fprintf (outf, " (no dependence)\n");
385
386 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
387 {
388 unsigned int i;
389 struct loop *loopi;
390
391 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
392 {
393 fprintf (outf, " access_fn_A: ");
394 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
395 fprintf (outf, " access_fn_B: ");
396 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
397 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
398 }
399
400 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
401 fprintf (outf, " loop nest: (");
402 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
403 fprintf (outf, "%d ", loopi->num);
404 fprintf (outf, ")\n");
405
406 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
407 {
408 fprintf (outf, " distance_vector: ");
409 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
410 DDR_NB_LOOPS (ddr));
411 }
412
413 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
414 {
415 fprintf (outf, " direction_vector: ");
416 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
417 DDR_NB_LOOPS (ddr));
418 }
419 }
420
421 fprintf (outf, ")\n");
422 }
423
424 /* Debug version. */
425
426 DEBUG_FUNCTION void
427 debug_data_dependence_relation (struct data_dependence_relation *ddr)
428 {
429 dump_data_dependence_relation (stderr, ddr);
430 }
431
432 /* Dump into FILE all the dependence relations from DDRS. */
433
434 void
435 dump_data_dependence_relations (FILE *file,
436 VEC (ddr_p, heap) *ddrs)
437 {
438 unsigned int i;
439 struct data_dependence_relation *ddr;
440
441 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
442 dump_data_dependence_relation (file, ddr);
443 }
444
445 /* Dump to STDERR all the dependence relations from DDRS. */
446
447 DEBUG_FUNCTION void
448 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
449 {
450 dump_data_dependence_relations (stderr, ddrs);
451 }
452
453 /* Dumps the distance and direction vectors in FILE. DDRS contains
454 the dependence relations, and VECT_SIZE is the size of the
455 dependence vectors, or in other words the number of loops in the
456 considered nest. */
457
458 static void
459 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
460 {
461 unsigned int i, j;
462 struct data_dependence_relation *ddr;
463 lambda_vector v;
464
465 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
466 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
467 {
468 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
469 {
470 fprintf (file, "DISTANCE_V (");
471 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
472 fprintf (file, ")\n");
473 }
474
475 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
476 {
477 fprintf (file, "DIRECTION_V (");
478 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
479 fprintf (file, ")\n");
480 }
481 }
482
483 fprintf (file, "\n\n");
484 }
485
486 /* Dumps the data dependence relations DDRS in FILE. */
487
488 static void
489 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
490 {
491 unsigned int i;
492 struct data_dependence_relation *ddr;
493
494 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
495 dump_data_dependence_relation (file, ddr);
496
497 fprintf (file, "\n\n");
498 }
499
500 DEBUG_FUNCTION void
501 debug_ddrs (VEC (ddr_p, heap) *ddrs)
502 {
503 dump_ddrs (stderr, ddrs);
504 }
505
506 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
507 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
508 constant of type ssizetype, and returns true. If we cannot do this
509 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
510 is returned. */
511
512 static bool
513 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
514 tree *var, tree *off)
515 {
516 tree var0, var1;
517 tree off0, off1;
518 enum tree_code ocode = code;
519
520 *var = NULL_TREE;
521 *off = NULL_TREE;
522
523 switch (code)
524 {
525 case INTEGER_CST:
526 *var = build_int_cst (type, 0);
527 *off = fold_convert (ssizetype, op0);
528 return true;
529
530 case POINTER_PLUS_EXPR:
531 ocode = PLUS_EXPR;
532 /* FALLTHROUGH */
533 case PLUS_EXPR:
534 case MINUS_EXPR:
535 split_constant_offset (op0, &var0, &off0);
536 split_constant_offset (op1, &var1, &off1);
537 *var = fold_build2 (code, type, var0, var1);
538 *off = size_binop (ocode, off0, off1);
539 return true;
540
541 case MULT_EXPR:
542 if (TREE_CODE (op1) != INTEGER_CST)
543 return false;
544
545 split_constant_offset (op0, &var0, &off0);
546 *var = fold_build2 (MULT_EXPR, type, var0, op1);
547 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
548 return true;
549
550 case ADDR_EXPR:
551 {
552 tree base, poffset;
553 HOST_WIDE_INT pbitsize, pbitpos;
554 enum machine_mode pmode;
555 int punsignedp, pvolatilep;
556
557 op0 = TREE_OPERAND (op0, 0);
558 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
559 &pmode, &punsignedp, &pvolatilep, false);
560
561 if (pbitpos % BITS_PER_UNIT != 0)
562 return false;
563 base = build_fold_addr_expr (base);
564 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
565
566 if (poffset)
567 {
568 split_constant_offset (poffset, &poffset, &off1);
569 off0 = size_binop (PLUS_EXPR, off0, off1);
570 if (POINTER_TYPE_P (TREE_TYPE (base)))
571 base = fold_build_pointer_plus (base, poffset);
572 else
573 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
574 fold_convert (TREE_TYPE (base), poffset));
575 }
576
577 var0 = fold_convert (type, base);
578
579 /* If variable length types are involved, punt, otherwise casts
580 might be converted into ARRAY_REFs in gimplify_conversion.
581 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
582 possibly no longer appears in current GIMPLE, might resurface.
583 This perhaps could run
584 if (CONVERT_EXPR_P (var0))
585 {
586 gimplify_conversion (&var0);
587 // Attempt to fill in any within var0 found ARRAY_REF's
588 // element size from corresponding op embedded ARRAY_REF,
589 // if unsuccessful, just punt.
590 } */
591 while (POINTER_TYPE_P (type))
592 type = TREE_TYPE (type);
593 if (int_size_in_bytes (type) < 0)
594 return false;
595
596 *var = var0;
597 *off = off0;
598 return true;
599 }
600
601 case SSA_NAME:
602 {
603 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
604 enum tree_code subcode;
605
606 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
607 return false;
608
609 var0 = gimple_assign_rhs1 (def_stmt);
610 subcode = gimple_assign_rhs_code (def_stmt);
611 var1 = gimple_assign_rhs2 (def_stmt);
612
613 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
614 }
615 CASE_CONVERT:
616 {
617 /* We must not introduce undefined overflow, and we must not change the value.
618 Hence we're okay if the inner type doesn't overflow to start with
619 (pointer or signed), the outer type also is an integer or pointer
620 and the outer precision is at least as large as the inner. */
621 tree itype = TREE_TYPE (op0);
622 if ((POINTER_TYPE_P (itype)
623 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
624 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
625 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
626 {
627 split_constant_offset (op0, &var0, off);
628 *var = fold_convert (type, var0);
629 return true;
630 }
631 return false;
632 }
633
634 default:
635 return false;
636 }
637 }
638
639 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
640 will be ssizetype. */
641
642 void
643 split_constant_offset (tree exp, tree *var, tree *off)
644 {
645 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
646 enum tree_code code;
647
648 *var = exp;
649 *off = ssize_int (0);
650 STRIP_NOPS (exp);
651
652 if (tree_is_chrec (exp)
653 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
654 return;
655
656 otype = TREE_TYPE (exp);
657 code = TREE_CODE (exp);
658 extract_ops_from_tree (exp, &code, &op0, &op1);
659 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
660 {
661 *var = fold_convert (type, e);
662 *off = o;
663 }
664 }
665
666 /* Returns the address ADDR of an object in a canonical shape (without nop
667 casts, and with type of pointer to the object). */
668
669 static tree
670 canonicalize_base_object_address (tree addr)
671 {
672 tree orig = addr;
673
674 STRIP_NOPS (addr);
675
676 /* The base address may be obtained by casting from integer, in that case
677 keep the cast. */
678 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
679 return orig;
680
681 if (TREE_CODE (addr) != ADDR_EXPR)
682 return addr;
683
684 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
685 }
686
687 /* Analyzes the behavior of the memory reference DR in the innermost loop or
688 basic block that contains it. Returns true if analysis succeed or false
689 otherwise. */
690
691 bool
692 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
693 {
694 gimple stmt = DR_STMT (dr);
695 struct loop *loop = loop_containing_stmt (stmt);
696 tree ref = DR_REF (dr);
697 HOST_WIDE_INT pbitsize, pbitpos;
698 tree base, poffset;
699 enum machine_mode pmode;
700 int punsignedp, pvolatilep;
701 affine_iv base_iv, offset_iv;
702 tree init, dinit, step;
703 bool in_loop = (loop && loop->num);
704
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 fprintf (dump_file, "analyze_innermost: ");
707
708 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
709 &pmode, &punsignedp, &pvolatilep, false);
710 gcc_assert (base != NULL_TREE);
711
712 if (pbitpos % BITS_PER_UNIT != 0)
713 {
714 if (dump_file && (dump_flags & TDF_DETAILS))
715 fprintf (dump_file, "failed: bit offset alignment.\n");
716 return false;
717 }
718
719 if (TREE_CODE (base) == MEM_REF)
720 {
721 if (!integer_zerop (TREE_OPERAND (base, 1)))
722 {
723 if (!poffset)
724 {
725 double_int moff = mem_ref_offset (base);
726 poffset = double_int_to_tree (sizetype, moff);
727 }
728 else
729 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
730 }
731 base = TREE_OPERAND (base, 0);
732 }
733 else
734 base = build_fold_addr_expr (base);
735
736 if (in_loop)
737 {
738 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
739 nest ? true : false))
740 {
741 if (nest)
742 {
743 if (dump_file && (dump_flags & TDF_DETAILS))
744 fprintf (dump_file, "failed: evolution of base is not"
745 " affine.\n");
746 return false;
747 }
748 else
749 {
750 base_iv.base = base;
751 base_iv.step = ssize_int (0);
752 base_iv.no_overflow = true;
753 }
754 }
755 }
756 else
757 {
758 base_iv.base = base;
759 base_iv.step = ssize_int (0);
760 base_iv.no_overflow = true;
761 }
762
763 if (!poffset)
764 {
765 offset_iv.base = ssize_int (0);
766 offset_iv.step = ssize_int (0);
767 }
768 else
769 {
770 if (!in_loop)
771 {
772 offset_iv.base = poffset;
773 offset_iv.step = ssize_int (0);
774 }
775 else if (!simple_iv (loop, loop_containing_stmt (stmt),
776 poffset, &offset_iv,
777 nest ? true : false))
778 {
779 if (nest)
780 {
781 if (dump_file && (dump_flags & TDF_DETAILS))
782 fprintf (dump_file, "failed: evolution of offset is not"
783 " affine.\n");
784 return false;
785 }
786 else
787 {
788 offset_iv.base = poffset;
789 offset_iv.step = ssize_int (0);
790 }
791 }
792 }
793
794 init = ssize_int (pbitpos / BITS_PER_UNIT);
795 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
796 init = size_binop (PLUS_EXPR, init, dinit);
797 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
798 init = size_binop (PLUS_EXPR, init, dinit);
799
800 step = size_binop (PLUS_EXPR,
801 fold_convert (ssizetype, base_iv.step),
802 fold_convert (ssizetype, offset_iv.step));
803
804 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
805
806 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
807 DR_INIT (dr) = init;
808 DR_STEP (dr) = step;
809
810 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
811
812 if (dump_file && (dump_flags & TDF_DETAILS))
813 fprintf (dump_file, "success.\n");
814
815 return true;
816 }
817
818 /* Determines the base object and the list of indices of memory reference
819 DR, analyzed in LOOP and instantiated in loop nest NEST. */
820
821 static void
822 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
823 {
824 VEC (tree, heap) *access_fns = NULL;
825 tree ref, op;
826 tree base, off, access_fn;
827 basic_block before_loop;
828
829 /* If analyzing a basic-block there are no indices to analyze
830 and thus no access functions. */
831 if (!nest)
832 {
833 DR_BASE_OBJECT (dr) = DR_REF (dr);
834 DR_ACCESS_FNS (dr) = NULL;
835 return;
836 }
837
838 ref = DR_REF (dr);
839 before_loop = block_before_loop (nest);
840
841 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
842 into a two element array with a constant index. The base is
843 then just the immediate underlying object. */
844 if (TREE_CODE (ref) == REALPART_EXPR)
845 {
846 ref = TREE_OPERAND (ref, 0);
847 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
848 }
849 else if (TREE_CODE (ref) == IMAGPART_EXPR)
850 {
851 ref = TREE_OPERAND (ref, 0);
852 VEC_safe_push (tree, heap, access_fns, integer_one_node);
853 }
854
855 /* Analyze access functions of dimensions we know to be independent. */
856 while (handled_component_p (ref))
857 {
858 if (TREE_CODE (ref) == ARRAY_REF)
859 {
860 op = TREE_OPERAND (ref, 1);
861 access_fn = analyze_scalar_evolution (loop, op);
862 access_fn = instantiate_scev (before_loop, loop, access_fn);
863 VEC_safe_push (tree, heap, access_fns, access_fn);
864 }
865 else if (TREE_CODE (ref) == COMPONENT_REF
866 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
867 {
868 /* For COMPONENT_REFs of records (but not unions!) use the
869 FIELD_DECL offset as constant access function so we can
870 disambiguate a[i].f1 and a[i].f2. */
871 tree off = component_ref_field_offset (ref);
872 off = size_binop (PLUS_EXPR,
873 size_binop (MULT_EXPR,
874 fold_convert (bitsizetype, off),
875 bitsize_int (BITS_PER_UNIT)),
876 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
877 VEC_safe_push (tree, heap, access_fns, off);
878 }
879 else
880 /* If we have an unhandled component we could not translate
881 to an access function stop analyzing. We have determined
882 our base object in this case. */
883 break;
884
885 ref = TREE_OPERAND (ref, 0);
886 }
887
888 /* If the address operand of a MEM_REF base has an evolution in the
889 analyzed nest, add it as an additional independent access-function. */
890 if (TREE_CODE (ref) == MEM_REF)
891 {
892 op = TREE_OPERAND (ref, 0);
893 access_fn = analyze_scalar_evolution (loop, op);
894 access_fn = instantiate_scev (before_loop, loop, access_fn);
895 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
896 {
897 tree orig_type;
898 tree memoff = TREE_OPERAND (ref, 1);
899 base = initial_condition (access_fn);
900 orig_type = TREE_TYPE (base);
901 STRIP_USELESS_TYPE_CONVERSION (base);
902 split_constant_offset (base, &base, &off);
903 /* Fold the MEM_REF offset into the evolutions initial
904 value to make more bases comparable. */
905 if (!integer_zerop (memoff))
906 {
907 off = size_binop (PLUS_EXPR, off,
908 fold_convert (ssizetype, memoff));
909 memoff = build_int_cst (TREE_TYPE (memoff), 0);
910 }
911 access_fn = chrec_replace_initial_condition
912 (access_fn, fold_convert (orig_type, off));
913 /* ??? This is still not a suitable base object for
914 dr_may_alias_p - the base object needs to be an
915 access that covers the object as whole. With
916 an evolution in the pointer this cannot be
917 guaranteed.
918 As a band-aid, mark the access so we can special-case
919 it in dr_may_alias_p. */
920 ref = fold_build2_loc (EXPR_LOCATION (ref),
921 MEM_REF, TREE_TYPE (ref),
922 base, memoff);
923 DR_UNCONSTRAINED_BASE (dr) = true;
924 VEC_safe_push (tree, heap, access_fns, access_fn);
925 }
926 }
927 else if (DECL_P (ref))
928 {
929 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
930 ref = build2 (MEM_REF, TREE_TYPE (ref),
931 build_fold_addr_expr (ref),
932 build_int_cst (reference_alias_ptr_type (ref), 0));
933 }
934
935 DR_BASE_OBJECT (dr) = ref;
936 DR_ACCESS_FNS (dr) = access_fns;
937 }
938
939 /* Extracts the alias analysis information from the memory reference DR. */
940
941 static void
942 dr_analyze_alias (struct data_reference *dr)
943 {
944 tree ref = DR_REF (dr);
945 tree base = get_base_address (ref), addr;
946
947 if (INDIRECT_REF_P (base)
948 || TREE_CODE (base) == MEM_REF)
949 {
950 addr = TREE_OPERAND (base, 0);
951 if (TREE_CODE (addr) == SSA_NAME)
952 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
953 }
954 }
955
956 /* Frees data reference DR. */
957
958 void
959 free_data_ref (data_reference_p dr)
960 {
961 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
962 free (dr);
963 }
964
965 /* Analyzes memory reference MEMREF accessed in STMT. The reference
966 is read if IS_READ is true, write otherwise. Returns the
967 data_reference description of MEMREF. NEST is the outermost loop
968 in which the reference should be instantiated, LOOP is the loop in
969 which the data reference should be analyzed. */
970
971 struct data_reference *
972 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
973 bool is_read)
974 {
975 struct data_reference *dr;
976
977 if (dump_file && (dump_flags & TDF_DETAILS))
978 {
979 fprintf (dump_file, "Creating dr for ");
980 print_generic_expr (dump_file, memref, TDF_SLIM);
981 fprintf (dump_file, "\n");
982 }
983
984 dr = XCNEW (struct data_reference);
985 DR_STMT (dr) = stmt;
986 DR_REF (dr) = memref;
987 DR_IS_READ (dr) = is_read;
988
989 dr_analyze_innermost (dr, nest);
990 dr_analyze_indices (dr, nest, loop);
991 dr_analyze_alias (dr);
992
993 if (dump_file && (dump_flags & TDF_DETAILS))
994 {
995 unsigned i;
996 fprintf (dump_file, "\tbase_address: ");
997 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
998 fprintf (dump_file, "\n\toffset from base address: ");
999 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1000 fprintf (dump_file, "\n\tconstant offset from base address: ");
1001 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1002 fprintf (dump_file, "\n\tstep: ");
1003 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1004 fprintf (dump_file, "\n\taligned to: ");
1005 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1006 fprintf (dump_file, "\n\tbase_object: ");
1007 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1008 fprintf (dump_file, "\n");
1009 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1010 {
1011 fprintf (dump_file, "\tAccess function %d: ", i);
1012 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1013 }
1014 }
1015
1016 return dr;
1017 }
1018
1019 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1020 expressions. */
1021 static bool
1022 dr_equal_offsets_p1 (tree offset1, tree offset2)
1023 {
1024 bool res;
1025
1026 STRIP_NOPS (offset1);
1027 STRIP_NOPS (offset2);
1028
1029 if (offset1 == offset2)
1030 return true;
1031
1032 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1033 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1034 return false;
1035
1036 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1037 TREE_OPERAND (offset2, 0));
1038
1039 if (!res || !BINARY_CLASS_P (offset1))
1040 return res;
1041
1042 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1043 TREE_OPERAND (offset2, 1));
1044
1045 return res;
1046 }
1047
1048 /* Check if DRA and DRB have equal offsets. */
1049 bool
1050 dr_equal_offsets_p (struct data_reference *dra,
1051 struct data_reference *drb)
1052 {
1053 tree offset1, offset2;
1054
1055 offset1 = DR_OFFSET (dra);
1056 offset2 = DR_OFFSET (drb);
1057
1058 return dr_equal_offsets_p1 (offset1, offset2);
1059 }
1060
1061 /* Returns true if FNA == FNB. */
1062
1063 static bool
1064 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1065 {
1066 unsigned i, n = VEC_length (tree, fna);
1067
1068 if (n != VEC_length (tree, fnb))
1069 return false;
1070
1071 for (i = 0; i < n; i++)
1072 if (!operand_equal_p (VEC_index (tree, fna, i),
1073 VEC_index (tree, fnb, i), 0))
1074 return false;
1075
1076 return true;
1077 }
1078
1079 /* If all the functions in CF are the same, returns one of them,
1080 otherwise returns NULL. */
1081
1082 static affine_fn
1083 common_affine_function (conflict_function *cf)
1084 {
1085 unsigned i;
1086 affine_fn comm;
1087
1088 if (!CF_NONTRIVIAL_P (cf))
1089 return NULL;
1090
1091 comm = cf->fns[0];
1092
1093 for (i = 1; i < cf->n; i++)
1094 if (!affine_function_equal_p (comm, cf->fns[i]))
1095 return NULL;
1096
1097 return comm;
1098 }
1099
1100 /* Returns the base of the affine function FN. */
1101
1102 static tree
1103 affine_function_base (affine_fn fn)
1104 {
1105 return VEC_index (tree, fn, 0);
1106 }
1107
1108 /* Returns true if FN is a constant. */
1109
1110 static bool
1111 affine_function_constant_p (affine_fn fn)
1112 {
1113 unsigned i;
1114 tree coef;
1115
1116 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1117 if (!integer_zerop (coef))
1118 return false;
1119
1120 return true;
1121 }
1122
1123 /* Returns true if FN is the zero constant function. */
1124
1125 static bool
1126 affine_function_zero_p (affine_fn fn)
1127 {
1128 return (integer_zerop (affine_function_base (fn))
1129 && affine_function_constant_p (fn));
1130 }
1131
1132 /* Returns a signed integer type with the largest precision from TA
1133 and TB. */
1134
1135 static tree
1136 signed_type_for_types (tree ta, tree tb)
1137 {
1138 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1139 return signed_type_for (ta);
1140 else
1141 return signed_type_for (tb);
1142 }
1143
1144 /* Applies operation OP on affine functions FNA and FNB, and returns the
1145 result. */
1146
1147 static affine_fn
1148 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1149 {
1150 unsigned i, n, m;
1151 affine_fn ret;
1152 tree coef;
1153
1154 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1155 {
1156 n = VEC_length (tree, fna);
1157 m = VEC_length (tree, fnb);
1158 }
1159 else
1160 {
1161 n = VEC_length (tree, fnb);
1162 m = VEC_length (tree, fna);
1163 }
1164
1165 ret = VEC_alloc (tree, heap, m);
1166 for (i = 0; i < n; i++)
1167 {
1168 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1169 TREE_TYPE (VEC_index (tree, fnb, i)));
1170
1171 VEC_quick_push (tree, ret,
1172 fold_build2 (op, type,
1173 VEC_index (tree, fna, i),
1174 VEC_index (tree, fnb, i)));
1175 }
1176
1177 for (; VEC_iterate (tree, fna, i, coef); i++)
1178 VEC_quick_push (tree, ret,
1179 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1180 coef, integer_zero_node));
1181 for (; VEC_iterate (tree, fnb, i, coef); i++)
1182 VEC_quick_push (tree, ret,
1183 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1184 integer_zero_node, coef));
1185
1186 return ret;
1187 }
1188
1189 /* Returns the sum of affine functions FNA and FNB. */
1190
1191 static affine_fn
1192 affine_fn_plus (affine_fn fna, affine_fn fnb)
1193 {
1194 return affine_fn_op (PLUS_EXPR, fna, fnb);
1195 }
1196
1197 /* Returns the difference of affine functions FNA and FNB. */
1198
1199 static affine_fn
1200 affine_fn_minus (affine_fn fna, affine_fn fnb)
1201 {
1202 return affine_fn_op (MINUS_EXPR, fna, fnb);
1203 }
1204
1205 /* Frees affine function FN. */
1206
1207 static void
1208 affine_fn_free (affine_fn fn)
1209 {
1210 VEC_free (tree, heap, fn);
1211 }
1212
1213 /* Determine for each subscript in the data dependence relation DDR
1214 the distance. */
1215
1216 static void
1217 compute_subscript_distance (struct data_dependence_relation *ddr)
1218 {
1219 conflict_function *cf_a, *cf_b;
1220 affine_fn fn_a, fn_b, diff;
1221
1222 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1223 {
1224 unsigned int i;
1225
1226 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1227 {
1228 struct subscript *subscript;
1229
1230 subscript = DDR_SUBSCRIPT (ddr, i);
1231 cf_a = SUB_CONFLICTS_IN_A (subscript);
1232 cf_b = SUB_CONFLICTS_IN_B (subscript);
1233
1234 fn_a = common_affine_function (cf_a);
1235 fn_b = common_affine_function (cf_b);
1236 if (!fn_a || !fn_b)
1237 {
1238 SUB_DISTANCE (subscript) = chrec_dont_know;
1239 return;
1240 }
1241 diff = affine_fn_minus (fn_a, fn_b);
1242
1243 if (affine_function_constant_p (diff))
1244 SUB_DISTANCE (subscript) = affine_function_base (diff);
1245 else
1246 SUB_DISTANCE (subscript) = chrec_dont_know;
1247
1248 affine_fn_free (diff);
1249 }
1250 }
1251 }
1252
1253 /* Returns the conflict function for "unknown". */
1254
1255 static conflict_function *
1256 conflict_fn_not_known (void)
1257 {
1258 conflict_function *fn = XCNEW (conflict_function);
1259 fn->n = NOT_KNOWN;
1260
1261 return fn;
1262 }
1263
1264 /* Returns the conflict function for "independent". */
1265
1266 static conflict_function *
1267 conflict_fn_no_dependence (void)
1268 {
1269 conflict_function *fn = XCNEW (conflict_function);
1270 fn->n = NO_DEPENDENCE;
1271
1272 return fn;
1273 }
1274
1275 /* Returns true if the address of OBJ is invariant in LOOP. */
1276
1277 static bool
1278 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1279 {
1280 while (handled_component_p (obj))
1281 {
1282 if (TREE_CODE (obj) == ARRAY_REF)
1283 {
1284 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1285 need to check the stride and the lower bound of the reference. */
1286 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1287 loop->num)
1288 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1289 loop->num))
1290 return false;
1291 }
1292 else if (TREE_CODE (obj) == COMPONENT_REF)
1293 {
1294 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1295 loop->num))
1296 return false;
1297 }
1298 obj = TREE_OPERAND (obj, 0);
1299 }
1300
1301 if (!INDIRECT_REF_P (obj)
1302 && TREE_CODE (obj) != MEM_REF)
1303 return true;
1304
1305 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1306 loop->num);
1307 }
1308
1309 /* Returns false if we can prove that data references A and B do not alias,
1310 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1311 considered. */
1312
1313 bool
1314 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1315 bool loop_nest)
1316 {
1317 tree addr_a = DR_BASE_OBJECT (a);
1318 tree addr_b = DR_BASE_OBJECT (b);
1319
1320 /* If we are not processing a loop nest but scalar code we
1321 do not need to care about possible cross-iteration dependences
1322 and thus can process the full original reference. Do so,
1323 similar to how loop invariant motion applies extra offset-based
1324 disambiguation. */
1325 if (!loop_nest)
1326 {
1327 aff_tree off1, off2;
1328 double_int size1, size2;
1329 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1330 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1331 aff_combination_scale (&off1, double_int_minus_one);
1332 aff_combination_add (&off2, &off1);
1333 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1334 return false;
1335 }
1336
1337 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1338 the size of the base-object. So we cannot do any offset/overlap
1339 based analysis but have to rely on points-to information only. */
1340 if (TREE_CODE (addr_a) == MEM_REF
1341 && DR_UNCONSTRAINED_BASE (a))
1342 {
1343 if (TREE_CODE (addr_b) == MEM_REF
1344 && DR_UNCONSTRAINED_BASE (b))
1345 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1346 TREE_OPERAND (addr_b, 0));
1347 else
1348 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1349 build_fold_addr_expr (addr_b));
1350 }
1351 else if (TREE_CODE (addr_b) == MEM_REF
1352 && DR_UNCONSTRAINED_BASE (b))
1353 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1354 TREE_OPERAND (addr_b, 0));
1355
1356 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1357 that is being subsetted in the loop nest. */
1358 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1359 return refs_output_dependent_p (addr_a, addr_b);
1360 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1361 return refs_anti_dependent_p (addr_a, addr_b);
1362 return refs_may_alias_p (addr_a, addr_b);
1363 }
1364
1365 /* Initialize a data dependence relation between data accesses A and
1366 B. NB_LOOPS is the number of loops surrounding the references: the
1367 size of the classic distance/direction vectors. */
1368
1369 struct data_dependence_relation *
1370 initialize_data_dependence_relation (struct data_reference *a,
1371 struct data_reference *b,
1372 VEC (loop_p, heap) *loop_nest)
1373 {
1374 struct data_dependence_relation *res;
1375 unsigned int i;
1376
1377 res = XNEW (struct data_dependence_relation);
1378 DDR_A (res) = a;
1379 DDR_B (res) = b;
1380 DDR_LOOP_NEST (res) = NULL;
1381 DDR_REVERSED_P (res) = false;
1382 DDR_SUBSCRIPTS (res) = NULL;
1383 DDR_DIR_VECTS (res) = NULL;
1384 DDR_DIST_VECTS (res) = NULL;
1385
1386 if (a == NULL || b == NULL)
1387 {
1388 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1389 return res;
1390 }
1391
1392 /* If the data references do not alias, then they are independent. */
1393 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1394 {
1395 DDR_ARE_DEPENDENT (res) = chrec_known;
1396 return res;
1397 }
1398
1399 /* The case where the references are exactly the same. */
1400 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1401 {
1402 if (loop_nest
1403 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1404 DR_BASE_OBJECT (a)))
1405 {
1406 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1407 return res;
1408 }
1409 DDR_AFFINE_P (res) = true;
1410 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1411 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1412 DDR_LOOP_NEST (res) = loop_nest;
1413 DDR_INNER_LOOP (res) = 0;
1414 DDR_SELF_REFERENCE (res) = true;
1415 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1416 {
1417 struct subscript *subscript;
1418
1419 subscript = XNEW (struct subscript);
1420 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1421 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1422 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1423 SUB_DISTANCE (subscript) = chrec_dont_know;
1424 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1425 }
1426 return res;
1427 }
1428
1429 /* If the references do not access the same object, we do not know
1430 whether they alias or not. */
1431 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1432 {
1433 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1434 return res;
1435 }
1436
1437 /* If the base of the object is not invariant in the loop nest, we cannot
1438 analyze it. TODO -- in fact, it would suffice to record that there may
1439 be arbitrary dependences in the loops where the base object varies. */
1440 if (loop_nest
1441 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1442 DR_BASE_OBJECT (a)))
1443 {
1444 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1445 return res;
1446 }
1447
1448 /* If the number of dimensions of the access to not agree we can have
1449 a pointer access to a component of the array element type and an
1450 array access while the base-objects are still the same. Punt. */
1451 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1452 {
1453 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1454 return res;
1455 }
1456
1457 DDR_AFFINE_P (res) = true;
1458 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1459 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1460 DDR_LOOP_NEST (res) = loop_nest;
1461 DDR_INNER_LOOP (res) = 0;
1462 DDR_SELF_REFERENCE (res) = false;
1463
1464 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1465 {
1466 struct subscript *subscript;
1467
1468 subscript = XNEW (struct subscript);
1469 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1470 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1471 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1472 SUB_DISTANCE (subscript) = chrec_dont_know;
1473 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1474 }
1475
1476 return res;
1477 }
1478
1479 /* Frees memory used by the conflict function F. */
1480
1481 static void
1482 free_conflict_function (conflict_function *f)
1483 {
1484 unsigned i;
1485
1486 if (CF_NONTRIVIAL_P (f))
1487 {
1488 for (i = 0; i < f->n; i++)
1489 affine_fn_free (f->fns[i]);
1490 }
1491 free (f);
1492 }
1493
1494 /* Frees memory used by SUBSCRIPTS. */
1495
1496 static void
1497 free_subscripts (VEC (subscript_p, heap) *subscripts)
1498 {
1499 unsigned i;
1500 subscript_p s;
1501
1502 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1503 {
1504 free_conflict_function (s->conflicting_iterations_in_a);
1505 free_conflict_function (s->conflicting_iterations_in_b);
1506 free (s);
1507 }
1508 VEC_free (subscript_p, heap, subscripts);
1509 }
1510
1511 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1512 description. */
1513
1514 static inline void
1515 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1516 tree chrec)
1517 {
1518 if (dump_file && (dump_flags & TDF_DETAILS))
1519 {
1520 fprintf (dump_file, "(dependence classified: ");
1521 print_generic_expr (dump_file, chrec, 0);
1522 fprintf (dump_file, ")\n");
1523 }
1524
1525 DDR_ARE_DEPENDENT (ddr) = chrec;
1526 free_subscripts (DDR_SUBSCRIPTS (ddr));
1527 DDR_SUBSCRIPTS (ddr) = NULL;
1528 }
1529
1530 /* The dependence relation DDR cannot be represented by a distance
1531 vector. */
1532
1533 static inline void
1534 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1535 {
1536 if (dump_file && (dump_flags & TDF_DETAILS))
1537 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1538
1539 DDR_AFFINE_P (ddr) = false;
1540 }
1541
1542 \f
1543
1544 /* This section contains the classic Banerjee tests. */
1545
1546 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1547 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1548
1549 static inline bool
1550 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1551 {
1552 return (evolution_function_is_constant_p (chrec_a)
1553 && evolution_function_is_constant_p (chrec_b));
1554 }
1555
1556 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1557 variable, i.e., if the SIV (Single Index Variable) test is true. */
1558
1559 static bool
1560 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1561 {
1562 if ((evolution_function_is_constant_p (chrec_a)
1563 && evolution_function_is_univariate_p (chrec_b))
1564 || (evolution_function_is_constant_p (chrec_b)
1565 && evolution_function_is_univariate_p (chrec_a)))
1566 return true;
1567
1568 if (evolution_function_is_univariate_p (chrec_a)
1569 && evolution_function_is_univariate_p (chrec_b))
1570 {
1571 switch (TREE_CODE (chrec_a))
1572 {
1573 case POLYNOMIAL_CHREC:
1574 switch (TREE_CODE (chrec_b))
1575 {
1576 case POLYNOMIAL_CHREC:
1577 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1578 return false;
1579
1580 default:
1581 return true;
1582 }
1583
1584 default:
1585 return true;
1586 }
1587 }
1588
1589 return false;
1590 }
1591
1592 /* Creates a conflict function with N dimensions. The affine functions
1593 in each dimension follow. */
1594
1595 static conflict_function *
1596 conflict_fn (unsigned n, ...)
1597 {
1598 unsigned i;
1599 conflict_function *ret = XCNEW (conflict_function);
1600 va_list ap;
1601
1602 gcc_assert (0 < n && n <= MAX_DIM);
1603 va_start(ap, n);
1604
1605 ret->n = n;
1606 for (i = 0; i < n; i++)
1607 ret->fns[i] = va_arg (ap, affine_fn);
1608 va_end(ap);
1609
1610 return ret;
1611 }
1612
1613 /* Returns constant affine function with value CST. */
1614
1615 static affine_fn
1616 affine_fn_cst (tree cst)
1617 {
1618 affine_fn fn = VEC_alloc (tree, heap, 1);
1619 VEC_quick_push (tree, fn, cst);
1620 return fn;
1621 }
1622
1623 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1624
1625 static affine_fn
1626 affine_fn_univar (tree cst, unsigned dim, tree coef)
1627 {
1628 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1629 unsigned i;
1630
1631 gcc_assert (dim > 0);
1632 VEC_quick_push (tree, fn, cst);
1633 for (i = 1; i < dim; i++)
1634 VEC_quick_push (tree, fn, integer_zero_node);
1635 VEC_quick_push (tree, fn, coef);
1636 return fn;
1637 }
1638
1639 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1640 *OVERLAPS_B are initialized to the functions that describe the
1641 relation between the elements accessed twice by CHREC_A and
1642 CHREC_B. For k >= 0, the following property is verified:
1643
1644 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1645
1646 static void
1647 analyze_ziv_subscript (tree chrec_a,
1648 tree chrec_b,
1649 conflict_function **overlaps_a,
1650 conflict_function **overlaps_b,
1651 tree *last_conflicts)
1652 {
1653 tree type, difference;
1654 dependence_stats.num_ziv++;
1655
1656 if (dump_file && (dump_flags & TDF_DETAILS))
1657 fprintf (dump_file, "(analyze_ziv_subscript \n");
1658
1659 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1660 chrec_a = chrec_convert (type, chrec_a, NULL);
1661 chrec_b = chrec_convert (type, chrec_b, NULL);
1662 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1663
1664 switch (TREE_CODE (difference))
1665 {
1666 case INTEGER_CST:
1667 if (integer_zerop (difference))
1668 {
1669 /* The difference is equal to zero: the accessed index
1670 overlaps for each iteration in the loop. */
1671 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1672 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1673 *last_conflicts = chrec_dont_know;
1674 dependence_stats.num_ziv_dependent++;
1675 }
1676 else
1677 {
1678 /* The accesses do not overlap. */
1679 *overlaps_a = conflict_fn_no_dependence ();
1680 *overlaps_b = conflict_fn_no_dependence ();
1681 *last_conflicts = integer_zero_node;
1682 dependence_stats.num_ziv_independent++;
1683 }
1684 break;
1685
1686 default:
1687 /* We're not sure whether the indexes overlap. For the moment,
1688 conservatively answer "don't know". */
1689 if (dump_file && (dump_flags & TDF_DETAILS))
1690 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1691
1692 *overlaps_a = conflict_fn_not_known ();
1693 *overlaps_b = conflict_fn_not_known ();
1694 *last_conflicts = chrec_dont_know;
1695 dependence_stats.num_ziv_unimplemented++;
1696 break;
1697 }
1698
1699 if (dump_file && (dump_flags & TDF_DETAILS))
1700 fprintf (dump_file, ")\n");
1701 }
1702
1703 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1704 and only if it fits to the int type. If this is not the case, or the
1705 bound on the number of iterations of LOOP could not be derived, returns
1706 chrec_dont_know. */
1707
1708 static tree
1709 max_stmt_executions_tree (struct loop *loop)
1710 {
1711 double_int nit;
1712
1713 if (!max_stmt_executions (loop, &nit))
1714 return chrec_dont_know;
1715
1716 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1717 return chrec_dont_know;
1718
1719 return double_int_to_tree (unsigned_type_node, nit);
1720 }
1721
1722 /* Determine whether the CHREC is always positive/negative. If the expression
1723 cannot be statically analyzed, return false, otherwise set the answer into
1724 VALUE. */
1725
1726 static bool
1727 chrec_is_positive (tree chrec, bool *value)
1728 {
1729 bool value0, value1, value2;
1730 tree end_value, nb_iter;
1731
1732 switch (TREE_CODE (chrec))
1733 {
1734 case POLYNOMIAL_CHREC:
1735 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1736 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1737 return false;
1738
1739 /* FIXME -- overflows. */
1740 if (value0 == value1)
1741 {
1742 *value = value0;
1743 return true;
1744 }
1745
1746 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1747 and the proof consists in showing that the sign never
1748 changes during the execution of the loop, from 0 to
1749 loop->nb_iterations. */
1750 if (!evolution_function_is_affine_p (chrec))
1751 return false;
1752
1753 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1754 if (chrec_contains_undetermined (nb_iter))
1755 return false;
1756
1757 #if 0
1758 /* TODO -- If the test is after the exit, we may decrease the number of
1759 iterations by one. */
1760 if (after_exit)
1761 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1762 #endif
1763
1764 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1765
1766 if (!chrec_is_positive (end_value, &value2))
1767 return false;
1768
1769 *value = value0;
1770 return value0 == value1;
1771
1772 case INTEGER_CST:
1773 switch (tree_int_cst_sgn (chrec))
1774 {
1775 case -1:
1776 *value = false;
1777 break;
1778 case 1:
1779 *value = true;
1780 break;
1781 default:
1782 return false;
1783 }
1784 return true;
1785
1786 default:
1787 return false;
1788 }
1789 }
1790
1791
1792 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1793 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1794 *OVERLAPS_B are initialized to the functions that describe the
1795 relation between the elements accessed twice by CHREC_A and
1796 CHREC_B. For k >= 0, the following property is verified:
1797
1798 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1799
1800 static void
1801 analyze_siv_subscript_cst_affine (tree chrec_a,
1802 tree chrec_b,
1803 conflict_function **overlaps_a,
1804 conflict_function **overlaps_b,
1805 tree *last_conflicts)
1806 {
1807 bool value0, value1, value2;
1808 tree type, difference, tmp;
1809
1810 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1811 chrec_a = chrec_convert (type, chrec_a, NULL);
1812 chrec_b = chrec_convert (type, chrec_b, NULL);
1813 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1814
1815 /* Special case overlap in the first iteration. */
1816 if (integer_zerop (difference))
1817 {
1818 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1819 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1820 *last_conflicts = integer_one_node;
1821 return;
1822 }
1823
1824 if (!chrec_is_positive (initial_condition (difference), &value0))
1825 {
1826 if (dump_file && (dump_flags & TDF_DETAILS))
1827 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1828
1829 dependence_stats.num_siv_unimplemented++;
1830 *overlaps_a = conflict_fn_not_known ();
1831 *overlaps_b = conflict_fn_not_known ();
1832 *last_conflicts = chrec_dont_know;
1833 return;
1834 }
1835 else
1836 {
1837 if (value0 == false)
1838 {
1839 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1840 {
1841 if (dump_file && (dump_flags & TDF_DETAILS))
1842 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1843
1844 *overlaps_a = conflict_fn_not_known ();
1845 *overlaps_b = conflict_fn_not_known ();
1846 *last_conflicts = chrec_dont_know;
1847 dependence_stats.num_siv_unimplemented++;
1848 return;
1849 }
1850 else
1851 {
1852 if (value1 == true)
1853 {
1854 /* Example:
1855 chrec_a = 12
1856 chrec_b = {10, +, 1}
1857 */
1858
1859 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1860 {
1861 HOST_WIDE_INT numiter;
1862 struct loop *loop = get_chrec_loop (chrec_b);
1863
1864 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1865 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1866 fold_build1 (ABS_EXPR, type, difference),
1867 CHREC_RIGHT (chrec_b));
1868 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1869 *last_conflicts = integer_one_node;
1870
1871
1872 /* Perform weak-zero siv test to see if overlap is
1873 outside the loop bounds. */
1874 numiter = max_stmt_executions_int (loop);
1875
1876 if (numiter >= 0
1877 && compare_tree_int (tmp, numiter) > 0)
1878 {
1879 free_conflict_function (*overlaps_a);
1880 free_conflict_function (*overlaps_b);
1881 *overlaps_a = conflict_fn_no_dependence ();
1882 *overlaps_b = conflict_fn_no_dependence ();
1883 *last_conflicts = integer_zero_node;
1884 dependence_stats.num_siv_independent++;
1885 return;
1886 }
1887 dependence_stats.num_siv_dependent++;
1888 return;
1889 }
1890
1891 /* When the step does not divide the difference, there are
1892 no overlaps. */
1893 else
1894 {
1895 *overlaps_a = conflict_fn_no_dependence ();
1896 *overlaps_b = conflict_fn_no_dependence ();
1897 *last_conflicts = integer_zero_node;
1898 dependence_stats.num_siv_independent++;
1899 return;
1900 }
1901 }
1902
1903 else
1904 {
1905 /* Example:
1906 chrec_a = 12
1907 chrec_b = {10, +, -1}
1908
1909 In this case, chrec_a will not overlap with chrec_b. */
1910 *overlaps_a = conflict_fn_no_dependence ();
1911 *overlaps_b = conflict_fn_no_dependence ();
1912 *last_conflicts = integer_zero_node;
1913 dependence_stats.num_siv_independent++;
1914 return;
1915 }
1916 }
1917 }
1918 else
1919 {
1920 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1921 {
1922 if (dump_file && (dump_flags & TDF_DETAILS))
1923 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1924
1925 *overlaps_a = conflict_fn_not_known ();
1926 *overlaps_b = conflict_fn_not_known ();
1927 *last_conflicts = chrec_dont_know;
1928 dependence_stats.num_siv_unimplemented++;
1929 return;
1930 }
1931 else
1932 {
1933 if (value2 == false)
1934 {
1935 /* Example:
1936 chrec_a = 3
1937 chrec_b = {10, +, -1}
1938 */
1939 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1940 {
1941 HOST_WIDE_INT numiter;
1942 struct loop *loop = get_chrec_loop (chrec_b);
1943
1944 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1945 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1946 CHREC_RIGHT (chrec_b));
1947 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1948 *last_conflicts = integer_one_node;
1949
1950 /* Perform weak-zero siv test to see if overlap is
1951 outside the loop bounds. */
1952 numiter = max_stmt_executions_int (loop);
1953
1954 if (numiter >= 0
1955 && compare_tree_int (tmp, numiter) > 0)
1956 {
1957 free_conflict_function (*overlaps_a);
1958 free_conflict_function (*overlaps_b);
1959 *overlaps_a = conflict_fn_no_dependence ();
1960 *overlaps_b = conflict_fn_no_dependence ();
1961 *last_conflicts = integer_zero_node;
1962 dependence_stats.num_siv_independent++;
1963 return;
1964 }
1965 dependence_stats.num_siv_dependent++;
1966 return;
1967 }
1968
1969 /* When the step does not divide the difference, there
1970 are no overlaps. */
1971 else
1972 {
1973 *overlaps_a = conflict_fn_no_dependence ();
1974 *overlaps_b = conflict_fn_no_dependence ();
1975 *last_conflicts = integer_zero_node;
1976 dependence_stats.num_siv_independent++;
1977 return;
1978 }
1979 }
1980 else
1981 {
1982 /* Example:
1983 chrec_a = 3
1984 chrec_b = {4, +, 1}
1985
1986 In this case, chrec_a will not overlap with chrec_b. */
1987 *overlaps_a = conflict_fn_no_dependence ();
1988 *overlaps_b = conflict_fn_no_dependence ();
1989 *last_conflicts = integer_zero_node;
1990 dependence_stats.num_siv_independent++;
1991 return;
1992 }
1993 }
1994 }
1995 }
1996 }
1997
1998 /* Helper recursive function for initializing the matrix A. Returns
1999 the initial value of CHREC. */
2000
2001 static tree
2002 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2003 {
2004 gcc_assert (chrec);
2005
2006 switch (TREE_CODE (chrec))
2007 {
2008 case POLYNOMIAL_CHREC:
2009 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2010
2011 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2012 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2013
2014 case PLUS_EXPR:
2015 case MULT_EXPR:
2016 case MINUS_EXPR:
2017 {
2018 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2019 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2020
2021 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2022 }
2023
2024 case NOP_EXPR:
2025 {
2026 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2027 return chrec_convert (chrec_type (chrec), op, NULL);
2028 }
2029
2030 case BIT_NOT_EXPR:
2031 {
2032 /* Handle ~X as -1 - X. */
2033 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2034 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2035 build_int_cst (TREE_TYPE (chrec), -1), op);
2036 }
2037
2038 case INTEGER_CST:
2039 return chrec;
2040
2041 default:
2042 gcc_unreachable ();
2043 return NULL_TREE;
2044 }
2045 }
2046
2047 #define FLOOR_DIV(x,y) ((x) / (y))
2048
2049 /* Solves the special case of the Diophantine equation:
2050 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2051
2052 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2053 number of iterations that loops X and Y run. The overlaps will be
2054 constructed as evolutions in dimension DIM. */
2055
2056 static void
2057 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2058 affine_fn *overlaps_a,
2059 affine_fn *overlaps_b,
2060 tree *last_conflicts, int dim)
2061 {
2062 if (((step_a > 0 && step_b > 0)
2063 || (step_a < 0 && step_b < 0)))
2064 {
2065 int step_overlaps_a, step_overlaps_b;
2066 int gcd_steps_a_b, last_conflict, tau2;
2067
2068 gcd_steps_a_b = gcd (step_a, step_b);
2069 step_overlaps_a = step_b / gcd_steps_a_b;
2070 step_overlaps_b = step_a / gcd_steps_a_b;
2071
2072 if (niter > 0)
2073 {
2074 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2075 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2076 last_conflict = tau2;
2077 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2078 }
2079 else
2080 *last_conflicts = chrec_dont_know;
2081
2082 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2083 build_int_cst (NULL_TREE,
2084 step_overlaps_a));
2085 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2086 build_int_cst (NULL_TREE,
2087 step_overlaps_b));
2088 }
2089
2090 else
2091 {
2092 *overlaps_a = affine_fn_cst (integer_zero_node);
2093 *overlaps_b = affine_fn_cst (integer_zero_node);
2094 *last_conflicts = integer_zero_node;
2095 }
2096 }
2097
2098 /* Solves the special case of a Diophantine equation where CHREC_A is
2099 an affine bivariate function, and CHREC_B is an affine univariate
2100 function. For example,
2101
2102 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2103
2104 has the following overlapping functions:
2105
2106 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2107 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2108 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2109
2110 FORNOW: This is a specialized implementation for a case occurring in
2111 a common benchmark. Implement the general algorithm. */
2112
2113 static void
2114 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2115 conflict_function **overlaps_a,
2116 conflict_function **overlaps_b,
2117 tree *last_conflicts)
2118 {
2119 bool xz_p, yz_p, xyz_p;
2120 int step_x, step_y, step_z;
2121 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2122 affine_fn overlaps_a_xz, overlaps_b_xz;
2123 affine_fn overlaps_a_yz, overlaps_b_yz;
2124 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2125 affine_fn ova1, ova2, ovb;
2126 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2127
2128 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2129 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2130 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2131
2132 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2133 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2134 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2135
2136 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2137 {
2138 if (dump_file && (dump_flags & TDF_DETAILS))
2139 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2140
2141 *overlaps_a = conflict_fn_not_known ();
2142 *overlaps_b = conflict_fn_not_known ();
2143 *last_conflicts = chrec_dont_know;
2144 return;
2145 }
2146
2147 niter = MIN (niter_x, niter_z);
2148 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2149 &overlaps_a_xz,
2150 &overlaps_b_xz,
2151 &last_conflicts_xz, 1);
2152 niter = MIN (niter_y, niter_z);
2153 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2154 &overlaps_a_yz,
2155 &overlaps_b_yz,
2156 &last_conflicts_yz, 2);
2157 niter = MIN (niter_x, niter_z);
2158 niter = MIN (niter_y, niter);
2159 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2160 &overlaps_a_xyz,
2161 &overlaps_b_xyz,
2162 &last_conflicts_xyz, 3);
2163
2164 xz_p = !integer_zerop (last_conflicts_xz);
2165 yz_p = !integer_zerop (last_conflicts_yz);
2166 xyz_p = !integer_zerop (last_conflicts_xyz);
2167
2168 if (xz_p || yz_p || xyz_p)
2169 {
2170 ova1 = affine_fn_cst (integer_zero_node);
2171 ova2 = affine_fn_cst (integer_zero_node);
2172 ovb = affine_fn_cst (integer_zero_node);
2173 if (xz_p)
2174 {
2175 affine_fn t0 = ova1;
2176 affine_fn t2 = ovb;
2177
2178 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2179 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2180 affine_fn_free (t0);
2181 affine_fn_free (t2);
2182 *last_conflicts = last_conflicts_xz;
2183 }
2184 if (yz_p)
2185 {
2186 affine_fn t0 = ova2;
2187 affine_fn t2 = ovb;
2188
2189 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2190 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2191 affine_fn_free (t0);
2192 affine_fn_free (t2);
2193 *last_conflicts = last_conflicts_yz;
2194 }
2195 if (xyz_p)
2196 {
2197 affine_fn t0 = ova1;
2198 affine_fn t2 = ova2;
2199 affine_fn t4 = ovb;
2200
2201 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2202 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2203 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2204 affine_fn_free (t0);
2205 affine_fn_free (t2);
2206 affine_fn_free (t4);
2207 *last_conflicts = last_conflicts_xyz;
2208 }
2209 *overlaps_a = conflict_fn (2, ova1, ova2);
2210 *overlaps_b = conflict_fn (1, ovb);
2211 }
2212 else
2213 {
2214 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2215 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2216 *last_conflicts = integer_zero_node;
2217 }
2218
2219 affine_fn_free (overlaps_a_xz);
2220 affine_fn_free (overlaps_b_xz);
2221 affine_fn_free (overlaps_a_yz);
2222 affine_fn_free (overlaps_b_yz);
2223 affine_fn_free (overlaps_a_xyz);
2224 affine_fn_free (overlaps_b_xyz);
2225 }
2226
2227 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2228
2229 static void
2230 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2231 int size)
2232 {
2233 memcpy (vec2, vec1, size * sizeof (*vec1));
2234 }
2235
2236 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2237
2238 static void
2239 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2240 int m, int n)
2241 {
2242 int i;
2243
2244 for (i = 0; i < m; i++)
2245 lambda_vector_copy (mat1[i], mat2[i], n);
2246 }
2247
2248 /* Store the N x N identity matrix in MAT. */
2249
2250 static void
2251 lambda_matrix_id (lambda_matrix mat, int size)
2252 {
2253 int i, j;
2254
2255 for (i = 0; i < size; i++)
2256 for (j = 0; j < size; j++)
2257 mat[i][j] = (i == j) ? 1 : 0;
2258 }
2259
2260 /* Return the first nonzero element of vector VEC1 between START and N.
2261 We must have START <= N. Returns N if VEC1 is the zero vector. */
2262
2263 static int
2264 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2265 {
2266 int j = start;
2267 while (j < n && vec1[j] == 0)
2268 j++;
2269 return j;
2270 }
2271
2272 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2273 R2 = R2 + CONST1 * R1. */
2274
2275 static void
2276 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2277 {
2278 int i;
2279
2280 if (const1 == 0)
2281 return;
2282
2283 for (i = 0; i < n; i++)
2284 mat[r2][i] += const1 * mat[r1][i];
2285 }
2286
2287 /* Swap rows R1 and R2 in matrix MAT. */
2288
2289 static void
2290 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2291 {
2292 lambda_vector row;
2293
2294 row = mat[r1];
2295 mat[r1] = mat[r2];
2296 mat[r2] = row;
2297 }
2298
2299 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2300 and store the result in VEC2. */
2301
2302 static void
2303 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2304 int size, int const1)
2305 {
2306 int i;
2307
2308 if (const1 == 0)
2309 lambda_vector_clear (vec2, size);
2310 else
2311 for (i = 0; i < size; i++)
2312 vec2[i] = const1 * vec1[i];
2313 }
2314
2315 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2316
2317 static void
2318 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2319 int size)
2320 {
2321 lambda_vector_mult_const (vec1, vec2, size, -1);
2322 }
2323
2324 /* Negate row R1 of matrix MAT which has N columns. */
2325
2326 static void
2327 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2328 {
2329 lambda_vector_negate (mat[r1], mat[r1], n);
2330 }
2331
2332 /* Return true if two vectors are equal. */
2333
2334 static bool
2335 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2336 {
2337 int i;
2338 for (i = 0; i < size; i++)
2339 if (vec1[i] != vec2[i])
2340 return false;
2341 return true;
2342 }
2343
2344 /* Given an M x N integer matrix A, this function determines an M x
2345 M unimodular matrix U, and an M x N echelon matrix S such that
2346 "U.A = S". This decomposition is also known as "right Hermite".
2347
2348 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2349 Restructuring Compilers" Utpal Banerjee. */
2350
2351 static void
2352 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2353 lambda_matrix S, lambda_matrix U)
2354 {
2355 int i, j, i0 = 0;
2356
2357 lambda_matrix_copy (A, S, m, n);
2358 lambda_matrix_id (U, m);
2359
2360 for (j = 0; j < n; j++)
2361 {
2362 if (lambda_vector_first_nz (S[j], m, i0) < m)
2363 {
2364 ++i0;
2365 for (i = m - 1; i >= i0; i--)
2366 {
2367 while (S[i][j] != 0)
2368 {
2369 int sigma, factor, a, b;
2370
2371 a = S[i-1][j];
2372 b = S[i][j];
2373 sigma = (a * b < 0) ? -1: 1;
2374 a = abs (a);
2375 b = abs (b);
2376 factor = sigma * (a / b);
2377
2378 lambda_matrix_row_add (S, n, i, i-1, -factor);
2379 lambda_matrix_row_exchange (S, i, i-1);
2380
2381 lambda_matrix_row_add (U, m, i, i-1, -factor);
2382 lambda_matrix_row_exchange (U, i, i-1);
2383 }
2384 }
2385 }
2386 }
2387 }
2388
2389 /* Determines the overlapping elements due to accesses CHREC_A and
2390 CHREC_B, that are affine functions. This function cannot handle
2391 symbolic evolution functions, ie. when initial conditions are
2392 parameters, because it uses lambda matrices of integers. */
2393
2394 static void
2395 analyze_subscript_affine_affine (tree chrec_a,
2396 tree chrec_b,
2397 conflict_function **overlaps_a,
2398 conflict_function **overlaps_b,
2399 tree *last_conflicts)
2400 {
2401 unsigned nb_vars_a, nb_vars_b, dim;
2402 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2403 lambda_matrix A, U, S;
2404 struct obstack scratch_obstack;
2405
2406 if (eq_evolutions_p (chrec_a, chrec_b))
2407 {
2408 /* The accessed index overlaps for each iteration in the
2409 loop. */
2410 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2411 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2412 *last_conflicts = chrec_dont_know;
2413 return;
2414 }
2415 if (dump_file && (dump_flags & TDF_DETAILS))
2416 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2417
2418 /* For determining the initial intersection, we have to solve a
2419 Diophantine equation. This is the most time consuming part.
2420
2421 For answering to the question: "Is there a dependence?" we have
2422 to prove that there exists a solution to the Diophantine
2423 equation, and that the solution is in the iteration domain,
2424 i.e. the solution is positive or zero, and that the solution
2425 happens before the upper bound loop.nb_iterations. Otherwise
2426 there is no dependence. This function outputs a description of
2427 the iterations that hold the intersections. */
2428
2429 nb_vars_a = nb_vars_in_chrec (chrec_a);
2430 nb_vars_b = nb_vars_in_chrec (chrec_b);
2431
2432 gcc_obstack_init (&scratch_obstack);
2433
2434 dim = nb_vars_a + nb_vars_b;
2435 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2436 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2437 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2438
2439 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2440 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2441 gamma = init_b - init_a;
2442
2443 /* Don't do all the hard work of solving the Diophantine equation
2444 when we already know the solution: for example,
2445 | {3, +, 1}_1
2446 | {3, +, 4}_2
2447 | gamma = 3 - 3 = 0.
2448 Then the first overlap occurs during the first iterations:
2449 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2450 */
2451 if (gamma == 0)
2452 {
2453 if (nb_vars_a == 1 && nb_vars_b == 1)
2454 {
2455 HOST_WIDE_INT step_a, step_b;
2456 HOST_WIDE_INT niter, niter_a, niter_b;
2457 affine_fn ova, ovb;
2458
2459 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2460 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2461 niter = MIN (niter_a, niter_b);
2462 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2463 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2464
2465 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2466 &ova, &ovb,
2467 last_conflicts, 1);
2468 *overlaps_a = conflict_fn (1, ova);
2469 *overlaps_b = conflict_fn (1, ovb);
2470 }
2471
2472 else if (nb_vars_a == 2 && nb_vars_b == 1)
2473 compute_overlap_steps_for_affine_1_2
2474 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2475
2476 else if (nb_vars_a == 1 && nb_vars_b == 2)
2477 compute_overlap_steps_for_affine_1_2
2478 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2479
2480 else
2481 {
2482 if (dump_file && (dump_flags & TDF_DETAILS))
2483 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2484 *overlaps_a = conflict_fn_not_known ();
2485 *overlaps_b = conflict_fn_not_known ();
2486 *last_conflicts = chrec_dont_know;
2487 }
2488 goto end_analyze_subs_aa;
2489 }
2490
2491 /* U.A = S */
2492 lambda_matrix_right_hermite (A, dim, 1, S, U);
2493
2494 if (S[0][0] < 0)
2495 {
2496 S[0][0] *= -1;
2497 lambda_matrix_row_negate (U, dim, 0);
2498 }
2499 gcd_alpha_beta = S[0][0];
2500
2501 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2502 but that is a quite strange case. Instead of ICEing, answer
2503 don't know. */
2504 if (gcd_alpha_beta == 0)
2505 {
2506 *overlaps_a = conflict_fn_not_known ();
2507 *overlaps_b = conflict_fn_not_known ();
2508 *last_conflicts = chrec_dont_know;
2509 goto end_analyze_subs_aa;
2510 }
2511
2512 /* The classic "gcd-test". */
2513 if (!int_divides_p (gcd_alpha_beta, gamma))
2514 {
2515 /* The "gcd-test" has determined that there is no integer
2516 solution, i.e. there is no dependence. */
2517 *overlaps_a = conflict_fn_no_dependence ();
2518 *overlaps_b = conflict_fn_no_dependence ();
2519 *last_conflicts = integer_zero_node;
2520 }
2521
2522 /* Both access functions are univariate. This includes SIV and MIV cases. */
2523 else if (nb_vars_a == 1 && nb_vars_b == 1)
2524 {
2525 /* Both functions should have the same evolution sign. */
2526 if (((A[0][0] > 0 && -A[1][0] > 0)
2527 || (A[0][0] < 0 && -A[1][0] < 0)))
2528 {
2529 /* The solutions are given by:
2530 |
2531 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2532 | [u21 u22] [y0]
2533
2534 For a given integer t. Using the following variables,
2535
2536 | i0 = u11 * gamma / gcd_alpha_beta
2537 | j0 = u12 * gamma / gcd_alpha_beta
2538 | i1 = u21
2539 | j1 = u22
2540
2541 the solutions are:
2542
2543 | x0 = i0 + i1 * t,
2544 | y0 = j0 + j1 * t. */
2545 HOST_WIDE_INT i0, j0, i1, j1;
2546
2547 i0 = U[0][0] * gamma / gcd_alpha_beta;
2548 j0 = U[0][1] * gamma / gcd_alpha_beta;
2549 i1 = U[1][0];
2550 j1 = U[1][1];
2551
2552 if ((i1 == 0 && i0 < 0)
2553 || (j1 == 0 && j0 < 0))
2554 {
2555 /* There is no solution.
2556 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2557 falls in here, but for the moment we don't look at the
2558 upper bound of the iteration domain. */
2559 *overlaps_a = conflict_fn_no_dependence ();
2560 *overlaps_b = conflict_fn_no_dependence ();
2561 *last_conflicts = integer_zero_node;
2562 goto end_analyze_subs_aa;
2563 }
2564
2565 if (i1 > 0 && j1 > 0)
2566 {
2567 HOST_WIDE_INT niter_a
2568 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2569 HOST_WIDE_INT niter_b
2570 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2571 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2572
2573 /* (X0, Y0) is a solution of the Diophantine equation:
2574 "chrec_a (X0) = chrec_b (Y0)". */
2575 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2576 CEIL (-j0, j1));
2577 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2578 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2579
2580 /* (X1, Y1) is the smallest positive solution of the eq
2581 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2582 first conflict occurs. */
2583 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2584 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2585 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2586
2587 if (niter > 0)
2588 {
2589 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2590 FLOOR_DIV (niter - j0, j1));
2591 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2592
2593 /* If the overlap occurs outside of the bounds of the
2594 loop, there is no dependence. */
2595 if (x1 >= niter || y1 >= niter)
2596 {
2597 *overlaps_a = conflict_fn_no_dependence ();
2598 *overlaps_b = conflict_fn_no_dependence ();
2599 *last_conflicts = integer_zero_node;
2600 goto end_analyze_subs_aa;
2601 }
2602 else
2603 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2604 }
2605 else
2606 *last_conflicts = chrec_dont_know;
2607
2608 *overlaps_a
2609 = conflict_fn (1,
2610 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2611 1,
2612 build_int_cst (NULL_TREE, i1)));
2613 *overlaps_b
2614 = conflict_fn (1,
2615 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2616 1,
2617 build_int_cst (NULL_TREE, j1)));
2618 }
2619 else
2620 {
2621 /* FIXME: For the moment, the upper bound of the
2622 iteration domain for i and j is not checked. */
2623 if (dump_file && (dump_flags & TDF_DETAILS))
2624 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2625 *overlaps_a = conflict_fn_not_known ();
2626 *overlaps_b = conflict_fn_not_known ();
2627 *last_conflicts = chrec_dont_know;
2628 }
2629 }
2630 else
2631 {
2632 if (dump_file && (dump_flags & TDF_DETAILS))
2633 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2634 *overlaps_a = conflict_fn_not_known ();
2635 *overlaps_b = conflict_fn_not_known ();
2636 *last_conflicts = chrec_dont_know;
2637 }
2638 }
2639 else
2640 {
2641 if (dump_file && (dump_flags & TDF_DETAILS))
2642 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2643 *overlaps_a = conflict_fn_not_known ();
2644 *overlaps_b = conflict_fn_not_known ();
2645 *last_conflicts = chrec_dont_know;
2646 }
2647
2648 end_analyze_subs_aa:
2649 obstack_free (&scratch_obstack, NULL);
2650 if (dump_file && (dump_flags & TDF_DETAILS))
2651 {
2652 fprintf (dump_file, " (overlaps_a = ");
2653 dump_conflict_function (dump_file, *overlaps_a);
2654 fprintf (dump_file, ")\n (overlaps_b = ");
2655 dump_conflict_function (dump_file, *overlaps_b);
2656 fprintf (dump_file, ")\n");
2657 fprintf (dump_file, ")\n");
2658 }
2659 }
2660
2661 /* Returns true when analyze_subscript_affine_affine can be used for
2662 determining the dependence relation between chrec_a and chrec_b,
2663 that contain symbols. This function modifies chrec_a and chrec_b
2664 such that the analysis result is the same, and such that they don't
2665 contain symbols, and then can safely be passed to the analyzer.
2666
2667 Example: The analysis of the following tuples of evolutions produce
2668 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2669 vs. {0, +, 1}_1
2670
2671 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2672 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2673 */
2674
2675 static bool
2676 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2677 {
2678 tree diff, type, left_a, left_b, right_b;
2679
2680 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2681 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2682 /* FIXME: For the moment not handled. Might be refined later. */
2683 return false;
2684
2685 type = chrec_type (*chrec_a);
2686 left_a = CHREC_LEFT (*chrec_a);
2687 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2688 diff = chrec_fold_minus (type, left_a, left_b);
2689
2690 if (!evolution_function_is_constant_p (diff))
2691 return false;
2692
2693 if (dump_file && (dump_flags & TDF_DETAILS))
2694 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2695
2696 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2697 diff, CHREC_RIGHT (*chrec_a));
2698 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2699 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2700 build_int_cst (type, 0),
2701 right_b);
2702 return true;
2703 }
2704
2705 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2706 *OVERLAPS_B are initialized to the functions that describe the
2707 relation between the elements accessed twice by CHREC_A and
2708 CHREC_B. For k >= 0, the following property is verified:
2709
2710 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2711
2712 static void
2713 analyze_siv_subscript (tree chrec_a,
2714 tree chrec_b,
2715 conflict_function **overlaps_a,
2716 conflict_function **overlaps_b,
2717 tree *last_conflicts,
2718 int loop_nest_num)
2719 {
2720 dependence_stats.num_siv++;
2721
2722 if (dump_file && (dump_flags & TDF_DETAILS))
2723 fprintf (dump_file, "(analyze_siv_subscript \n");
2724
2725 if (evolution_function_is_constant_p (chrec_a)
2726 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2727 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2728 overlaps_a, overlaps_b, last_conflicts);
2729
2730 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2731 && evolution_function_is_constant_p (chrec_b))
2732 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2733 overlaps_b, overlaps_a, last_conflicts);
2734
2735 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2736 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2737 {
2738 if (!chrec_contains_symbols (chrec_a)
2739 && !chrec_contains_symbols (chrec_b))
2740 {
2741 analyze_subscript_affine_affine (chrec_a, chrec_b,
2742 overlaps_a, overlaps_b,
2743 last_conflicts);
2744
2745 if (CF_NOT_KNOWN_P (*overlaps_a)
2746 || CF_NOT_KNOWN_P (*overlaps_b))
2747 dependence_stats.num_siv_unimplemented++;
2748 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2749 || CF_NO_DEPENDENCE_P (*overlaps_b))
2750 dependence_stats.num_siv_independent++;
2751 else
2752 dependence_stats.num_siv_dependent++;
2753 }
2754 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2755 &chrec_b))
2756 {
2757 analyze_subscript_affine_affine (chrec_a, chrec_b,
2758 overlaps_a, overlaps_b,
2759 last_conflicts);
2760
2761 if (CF_NOT_KNOWN_P (*overlaps_a)
2762 || CF_NOT_KNOWN_P (*overlaps_b))
2763 dependence_stats.num_siv_unimplemented++;
2764 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2765 || CF_NO_DEPENDENCE_P (*overlaps_b))
2766 dependence_stats.num_siv_independent++;
2767 else
2768 dependence_stats.num_siv_dependent++;
2769 }
2770 else
2771 goto siv_subscript_dontknow;
2772 }
2773
2774 else
2775 {
2776 siv_subscript_dontknow:;
2777 if (dump_file && (dump_flags & TDF_DETAILS))
2778 fprintf (dump_file, "siv test failed: unimplemented.\n");
2779 *overlaps_a = conflict_fn_not_known ();
2780 *overlaps_b = conflict_fn_not_known ();
2781 *last_conflicts = chrec_dont_know;
2782 dependence_stats.num_siv_unimplemented++;
2783 }
2784
2785 if (dump_file && (dump_flags & TDF_DETAILS))
2786 fprintf (dump_file, ")\n");
2787 }
2788
2789 /* Returns false if we can prove that the greatest common divisor of the steps
2790 of CHREC does not divide CST, false otherwise. */
2791
2792 static bool
2793 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2794 {
2795 HOST_WIDE_INT cd = 0, val;
2796 tree step;
2797
2798 if (!host_integerp (cst, 0))
2799 return true;
2800 val = tree_low_cst (cst, 0);
2801
2802 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2803 {
2804 step = CHREC_RIGHT (chrec);
2805 if (!host_integerp (step, 0))
2806 return true;
2807 cd = gcd (cd, tree_low_cst (step, 0));
2808 chrec = CHREC_LEFT (chrec);
2809 }
2810
2811 return val % cd == 0;
2812 }
2813
2814 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2815 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2816 functions that describe the relation between the elements accessed
2817 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2818 is verified:
2819
2820 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2821
2822 static void
2823 analyze_miv_subscript (tree chrec_a,
2824 tree chrec_b,
2825 conflict_function **overlaps_a,
2826 conflict_function **overlaps_b,
2827 tree *last_conflicts,
2828 struct loop *loop_nest)
2829 {
2830 tree type, difference;
2831
2832 dependence_stats.num_miv++;
2833 if (dump_file && (dump_flags & TDF_DETAILS))
2834 fprintf (dump_file, "(analyze_miv_subscript \n");
2835
2836 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2837 chrec_a = chrec_convert (type, chrec_a, NULL);
2838 chrec_b = chrec_convert (type, chrec_b, NULL);
2839 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2840
2841 if (eq_evolutions_p (chrec_a, chrec_b))
2842 {
2843 /* Access functions are the same: all the elements are accessed
2844 in the same order. */
2845 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2846 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2847 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2848 dependence_stats.num_miv_dependent++;
2849 }
2850
2851 else if (evolution_function_is_constant_p (difference)
2852 /* For the moment, the following is verified:
2853 evolution_function_is_affine_multivariate_p (chrec_a,
2854 loop_nest->num) */
2855 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2856 {
2857 /* testsuite/.../ssa-chrec-33.c
2858 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2859
2860 The difference is 1, and all the evolution steps are multiples
2861 of 2, consequently there are no overlapping elements. */
2862 *overlaps_a = conflict_fn_no_dependence ();
2863 *overlaps_b = conflict_fn_no_dependence ();
2864 *last_conflicts = integer_zero_node;
2865 dependence_stats.num_miv_independent++;
2866 }
2867
2868 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2869 && !chrec_contains_symbols (chrec_a)
2870 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2871 && !chrec_contains_symbols (chrec_b))
2872 {
2873 /* testsuite/.../ssa-chrec-35.c
2874 {0, +, 1}_2 vs. {0, +, 1}_3
2875 the overlapping elements are respectively located at iterations:
2876 {0, +, 1}_x and {0, +, 1}_x,
2877 in other words, we have the equality:
2878 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2879
2880 Other examples:
2881 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2882 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2883
2884 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2885 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2886 */
2887 analyze_subscript_affine_affine (chrec_a, chrec_b,
2888 overlaps_a, overlaps_b, last_conflicts);
2889
2890 if (CF_NOT_KNOWN_P (*overlaps_a)
2891 || CF_NOT_KNOWN_P (*overlaps_b))
2892 dependence_stats.num_miv_unimplemented++;
2893 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2894 || CF_NO_DEPENDENCE_P (*overlaps_b))
2895 dependence_stats.num_miv_independent++;
2896 else
2897 dependence_stats.num_miv_dependent++;
2898 }
2899
2900 else
2901 {
2902 /* When the analysis is too difficult, answer "don't know". */
2903 if (dump_file && (dump_flags & TDF_DETAILS))
2904 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2905
2906 *overlaps_a = conflict_fn_not_known ();
2907 *overlaps_b = conflict_fn_not_known ();
2908 *last_conflicts = chrec_dont_know;
2909 dependence_stats.num_miv_unimplemented++;
2910 }
2911
2912 if (dump_file && (dump_flags & TDF_DETAILS))
2913 fprintf (dump_file, ")\n");
2914 }
2915
2916 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2917 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2918 OVERLAP_ITERATIONS_B are initialized with two functions that
2919 describe the iterations that contain conflicting elements.
2920
2921 Remark: For an integer k >= 0, the following equality is true:
2922
2923 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2924 */
2925
2926 static void
2927 analyze_overlapping_iterations (tree chrec_a,
2928 tree chrec_b,
2929 conflict_function **overlap_iterations_a,
2930 conflict_function **overlap_iterations_b,
2931 tree *last_conflicts, struct loop *loop_nest)
2932 {
2933 unsigned int lnn = loop_nest->num;
2934
2935 dependence_stats.num_subscript_tests++;
2936
2937 if (dump_file && (dump_flags & TDF_DETAILS))
2938 {
2939 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2940 fprintf (dump_file, " (chrec_a = ");
2941 print_generic_expr (dump_file, chrec_a, 0);
2942 fprintf (dump_file, ")\n (chrec_b = ");
2943 print_generic_expr (dump_file, chrec_b, 0);
2944 fprintf (dump_file, ")\n");
2945 }
2946
2947 if (chrec_a == NULL_TREE
2948 || chrec_b == NULL_TREE
2949 || chrec_contains_undetermined (chrec_a)
2950 || chrec_contains_undetermined (chrec_b))
2951 {
2952 dependence_stats.num_subscript_undetermined++;
2953
2954 *overlap_iterations_a = conflict_fn_not_known ();
2955 *overlap_iterations_b = conflict_fn_not_known ();
2956 }
2957
2958 /* If they are the same chrec, and are affine, they overlap
2959 on every iteration. */
2960 else if (eq_evolutions_p (chrec_a, chrec_b)
2961 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2962 || operand_equal_p (chrec_a, chrec_b, 0)))
2963 {
2964 dependence_stats.num_same_subscript_function++;
2965 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2966 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2967 *last_conflicts = chrec_dont_know;
2968 }
2969
2970 /* If they aren't the same, and aren't affine, we can't do anything
2971 yet. */
2972 else if ((chrec_contains_symbols (chrec_a)
2973 || chrec_contains_symbols (chrec_b))
2974 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2975 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2976 {
2977 dependence_stats.num_subscript_undetermined++;
2978 *overlap_iterations_a = conflict_fn_not_known ();
2979 *overlap_iterations_b = conflict_fn_not_known ();
2980 }
2981
2982 else if (ziv_subscript_p (chrec_a, chrec_b))
2983 analyze_ziv_subscript (chrec_a, chrec_b,
2984 overlap_iterations_a, overlap_iterations_b,
2985 last_conflicts);
2986
2987 else if (siv_subscript_p (chrec_a, chrec_b))
2988 analyze_siv_subscript (chrec_a, chrec_b,
2989 overlap_iterations_a, overlap_iterations_b,
2990 last_conflicts, lnn);
2991
2992 else
2993 analyze_miv_subscript (chrec_a, chrec_b,
2994 overlap_iterations_a, overlap_iterations_b,
2995 last_conflicts, loop_nest);
2996
2997 if (dump_file && (dump_flags & TDF_DETAILS))
2998 {
2999 fprintf (dump_file, " (overlap_iterations_a = ");
3000 dump_conflict_function (dump_file, *overlap_iterations_a);
3001 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3002 dump_conflict_function (dump_file, *overlap_iterations_b);
3003 fprintf (dump_file, ")\n");
3004 fprintf (dump_file, ")\n");
3005 }
3006 }
3007
3008 /* Helper function for uniquely inserting distance vectors. */
3009
3010 static void
3011 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3012 {
3013 unsigned i;
3014 lambda_vector v;
3015
3016 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
3017 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3018 return;
3019
3020 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
3021 }
3022
3023 /* Helper function for uniquely inserting direction vectors. */
3024
3025 static void
3026 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3027 {
3028 unsigned i;
3029 lambda_vector v;
3030
3031 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
3032 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3033 return;
3034
3035 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
3036 }
3037
3038 /* Add a distance of 1 on all the loops outer than INDEX. If we
3039 haven't yet determined a distance for this outer loop, push a new
3040 distance vector composed of the previous distance, and a distance
3041 of 1 for this outer loop. Example:
3042
3043 | loop_1
3044 | loop_2
3045 | A[10]
3046 | endloop_2
3047 | endloop_1
3048
3049 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3050 save (0, 1), then we have to save (1, 0). */
3051
3052 static void
3053 add_outer_distances (struct data_dependence_relation *ddr,
3054 lambda_vector dist_v, int index)
3055 {
3056 /* For each outer loop where init_v is not set, the accesses are
3057 in dependence of distance 1 in the loop. */
3058 while (--index >= 0)
3059 {
3060 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3061 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3062 save_v[index] = 1;
3063 save_dist_v (ddr, save_v);
3064 }
3065 }
3066
3067 /* Return false when fail to represent the data dependence as a
3068 distance vector. INIT_B is set to true when a component has been
3069 added to the distance vector DIST_V. INDEX_CARRY is then set to
3070 the index in DIST_V that carries the dependence. */
3071
3072 static bool
3073 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3074 struct data_reference *ddr_a,
3075 struct data_reference *ddr_b,
3076 lambda_vector dist_v, bool *init_b,
3077 int *index_carry)
3078 {
3079 unsigned i;
3080 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3081
3082 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3083 {
3084 tree access_fn_a, access_fn_b;
3085 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3086
3087 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3088 {
3089 non_affine_dependence_relation (ddr);
3090 return false;
3091 }
3092
3093 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3094 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3095
3096 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3097 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3098 {
3099 int dist, index;
3100 int var_a = CHREC_VARIABLE (access_fn_a);
3101 int var_b = CHREC_VARIABLE (access_fn_b);
3102
3103 if (var_a != var_b
3104 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3105 {
3106 non_affine_dependence_relation (ddr);
3107 return false;
3108 }
3109
3110 dist = int_cst_value (SUB_DISTANCE (subscript));
3111 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3112 *index_carry = MIN (index, *index_carry);
3113
3114 /* This is the subscript coupling test. If we have already
3115 recorded a distance for this loop (a distance coming from
3116 another subscript), it should be the same. For example,
3117 in the following code, there is no dependence:
3118
3119 | loop i = 0, N, 1
3120 | T[i+1][i] = ...
3121 | ... = T[i][i]
3122 | endloop
3123 */
3124 if (init_v[index] != 0 && dist_v[index] != dist)
3125 {
3126 finalize_ddr_dependent (ddr, chrec_known);
3127 return false;
3128 }
3129
3130 dist_v[index] = dist;
3131 init_v[index] = 1;
3132 *init_b = true;
3133 }
3134 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3135 {
3136 /* This can be for example an affine vs. constant dependence
3137 (T[i] vs. T[3]) that is not an affine dependence and is
3138 not representable as a distance vector. */
3139 non_affine_dependence_relation (ddr);
3140 return false;
3141 }
3142 }
3143
3144 return true;
3145 }
3146
3147 /* Return true when the DDR contains only constant access functions. */
3148
3149 static bool
3150 constant_access_functions (const struct data_dependence_relation *ddr)
3151 {
3152 unsigned i;
3153
3154 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3155 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3156 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3157 return false;
3158
3159 return true;
3160 }
3161
3162 /* Helper function for the case where DDR_A and DDR_B are the same
3163 multivariate access function with a constant step. For an example
3164 see pr34635-1.c. */
3165
3166 static void
3167 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3168 {
3169 int x_1, x_2;
3170 tree c_1 = CHREC_LEFT (c_2);
3171 tree c_0 = CHREC_LEFT (c_1);
3172 lambda_vector dist_v;
3173 int v1, v2, cd;
3174
3175 /* Polynomials with more than 2 variables are not handled yet. When
3176 the evolution steps are parameters, it is not possible to
3177 represent the dependence using classical distance vectors. */
3178 if (TREE_CODE (c_0) != INTEGER_CST
3179 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3180 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3181 {
3182 DDR_AFFINE_P (ddr) = false;
3183 return;
3184 }
3185
3186 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3187 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3188
3189 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3190 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3191 v1 = int_cst_value (CHREC_RIGHT (c_1));
3192 v2 = int_cst_value (CHREC_RIGHT (c_2));
3193 cd = gcd (v1, v2);
3194 v1 /= cd;
3195 v2 /= cd;
3196
3197 if (v2 < 0)
3198 {
3199 v2 = -v2;
3200 v1 = -v1;
3201 }
3202
3203 dist_v[x_1] = v2;
3204 dist_v[x_2] = -v1;
3205 save_dist_v (ddr, dist_v);
3206
3207 add_outer_distances (ddr, dist_v, x_1);
3208 }
3209
3210 /* Helper function for the case where DDR_A and DDR_B are the same
3211 access functions. */
3212
3213 static void
3214 add_other_self_distances (struct data_dependence_relation *ddr)
3215 {
3216 lambda_vector dist_v;
3217 unsigned i;
3218 int index_carry = DDR_NB_LOOPS (ddr);
3219
3220 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3221 {
3222 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3223
3224 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3225 {
3226 if (!evolution_function_is_univariate_p (access_fun))
3227 {
3228 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3229 {
3230 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3231 return;
3232 }
3233
3234 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3235
3236 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3237 add_multivariate_self_dist (ddr, access_fun);
3238 else
3239 /* The evolution step is not constant: it varies in
3240 the outer loop, so this cannot be represented by a
3241 distance vector. For example in pr34635.c the
3242 evolution is {0, +, {0, +, 4}_1}_2. */
3243 DDR_AFFINE_P (ddr) = false;
3244
3245 return;
3246 }
3247
3248 index_carry = MIN (index_carry,
3249 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3250 DDR_LOOP_NEST (ddr)));
3251 }
3252 }
3253
3254 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3255 add_outer_distances (ddr, dist_v, index_carry);
3256 }
3257
3258 static void
3259 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3260 {
3261 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3262
3263 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3264 save_dist_v (ddr, dist_v);
3265 }
3266
3267 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3268 is the case for example when access functions are the same and
3269 equal to a constant, as in:
3270
3271 | loop_1
3272 | A[3] = ...
3273 | ... = A[3]
3274 | endloop_1
3275
3276 in which case the distance vectors are (0) and (1). */
3277
3278 static void
3279 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3280 {
3281 unsigned i, j;
3282
3283 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3284 {
3285 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3286 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3287 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3288
3289 for (j = 0; j < ca->n; j++)
3290 if (affine_function_zero_p (ca->fns[j]))
3291 {
3292 insert_innermost_unit_dist_vector (ddr);
3293 return;
3294 }
3295
3296 for (j = 0; j < cb->n; j++)
3297 if (affine_function_zero_p (cb->fns[j]))
3298 {
3299 insert_innermost_unit_dist_vector (ddr);
3300 return;
3301 }
3302 }
3303 }
3304
3305 /* Compute the classic per loop distance vector. DDR is the data
3306 dependence relation to build a vector from. Return false when fail
3307 to represent the data dependence as a distance vector. */
3308
3309 static bool
3310 build_classic_dist_vector (struct data_dependence_relation *ddr,
3311 struct loop *loop_nest)
3312 {
3313 bool init_b = false;
3314 int index_carry = DDR_NB_LOOPS (ddr);
3315 lambda_vector dist_v;
3316
3317 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3318 return false;
3319
3320 if (same_access_functions (ddr))
3321 {
3322 /* Save the 0 vector. */
3323 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3324 save_dist_v (ddr, dist_v);
3325
3326 if (constant_access_functions (ddr))
3327 add_distance_for_zero_overlaps (ddr);
3328
3329 if (DDR_NB_LOOPS (ddr) > 1)
3330 add_other_self_distances (ddr);
3331
3332 return true;
3333 }
3334
3335 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3336 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3337 dist_v, &init_b, &index_carry))
3338 return false;
3339
3340 /* Save the distance vector if we initialized one. */
3341 if (init_b)
3342 {
3343 /* Verify a basic constraint: classic distance vectors should
3344 always be lexicographically positive.
3345
3346 Data references are collected in the order of execution of
3347 the program, thus for the following loop
3348
3349 | for (i = 1; i < 100; i++)
3350 | for (j = 1; j < 100; j++)
3351 | {
3352 | t = T[j+1][i-1]; // A
3353 | T[j][i] = t + 2; // B
3354 | }
3355
3356 references are collected following the direction of the wind:
3357 A then B. The data dependence tests are performed also
3358 following this order, such that we're looking at the distance
3359 separating the elements accessed by A from the elements later
3360 accessed by B. But in this example, the distance returned by
3361 test_dep (A, B) is lexicographically negative (-1, 1), that
3362 means that the access A occurs later than B with respect to
3363 the outer loop, ie. we're actually looking upwind. In this
3364 case we solve test_dep (B, A) looking downwind to the
3365 lexicographically positive solution, that returns the
3366 distance vector (1, -1). */
3367 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3368 {
3369 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3370 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3371 loop_nest))
3372 return false;
3373 compute_subscript_distance (ddr);
3374 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3375 save_v, &init_b, &index_carry))
3376 return false;
3377 save_dist_v (ddr, save_v);
3378 DDR_REVERSED_P (ddr) = true;
3379
3380 /* In this case there is a dependence forward for all the
3381 outer loops:
3382
3383 | for (k = 1; k < 100; k++)
3384 | for (i = 1; i < 100; i++)
3385 | for (j = 1; j < 100; j++)
3386 | {
3387 | t = T[j+1][i-1]; // A
3388 | T[j][i] = t + 2; // B
3389 | }
3390
3391 the vectors are:
3392 (0, 1, -1)
3393 (1, 1, -1)
3394 (1, -1, 1)
3395 */
3396 if (DDR_NB_LOOPS (ddr) > 1)
3397 {
3398 add_outer_distances (ddr, save_v, index_carry);
3399 add_outer_distances (ddr, dist_v, index_carry);
3400 }
3401 }
3402 else
3403 {
3404 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3405 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3406
3407 if (DDR_NB_LOOPS (ddr) > 1)
3408 {
3409 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3410
3411 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3412 DDR_A (ddr), loop_nest))
3413 return false;
3414 compute_subscript_distance (ddr);
3415 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3416 opposite_v, &init_b,
3417 &index_carry))
3418 return false;
3419
3420 save_dist_v (ddr, save_v);
3421 add_outer_distances (ddr, dist_v, index_carry);
3422 add_outer_distances (ddr, opposite_v, index_carry);
3423 }
3424 else
3425 save_dist_v (ddr, save_v);
3426 }
3427 }
3428 else
3429 {
3430 /* There is a distance of 1 on all the outer loops: Example:
3431 there is a dependence of distance 1 on loop_1 for the array A.
3432
3433 | loop_1
3434 | A[5] = ...
3435 | endloop
3436 */
3437 add_outer_distances (ddr, dist_v,
3438 lambda_vector_first_nz (dist_v,
3439 DDR_NB_LOOPS (ddr), 0));
3440 }
3441
3442 if (dump_file && (dump_flags & TDF_DETAILS))
3443 {
3444 unsigned i;
3445
3446 fprintf (dump_file, "(build_classic_dist_vector\n");
3447 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3448 {
3449 fprintf (dump_file, " dist_vector = (");
3450 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3451 DDR_NB_LOOPS (ddr));
3452 fprintf (dump_file, " )\n");
3453 }
3454 fprintf (dump_file, ")\n");
3455 }
3456
3457 return true;
3458 }
3459
3460 /* Return the direction for a given distance.
3461 FIXME: Computing dir this way is suboptimal, since dir can catch
3462 cases that dist is unable to represent. */
3463
3464 static inline enum data_dependence_direction
3465 dir_from_dist (int dist)
3466 {
3467 if (dist > 0)
3468 return dir_positive;
3469 else if (dist < 0)
3470 return dir_negative;
3471 else
3472 return dir_equal;
3473 }
3474
3475 /* Compute the classic per loop direction vector. DDR is the data
3476 dependence relation to build a vector from. */
3477
3478 static void
3479 build_classic_dir_vector (struct data_dependence_relation *ddr)
3480 {
3481 unsigned i, j;
3482 lambda_vector dist_v;
3483
3484 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3485 {
3486 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3487
3488 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3489 dir_v[j] = dir_from_dist (dist_v[j]);
3490
3491 save_dir_v (ddr, dir_v);
3492 }
3493 }
3494
3495 /* Helper function. Returns true when there is a dependence between
3496 data references DRA and DRB. */
3497
3498 static bool
3499 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3500 struct data_reference *dra,
3501 struct data_reference *drb,
3502 struct loop *loop_nest)
3503 {
3504 unsigned int i;
3505 tree last_conflicts;
3506 struct subscript *subscript;
3507 tree res = NULL_TREE;
3508
3509 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3510 i++)
3511 {
3512 conflict_function *overlaps_a, *overlaps_b;
3513
3514 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3515 DR_ACCESS_FN (drb, i),
3516 &overlaps_a, &overlaps_b,
3517 &last_conflicts, loop_nest);
3518
3519 if (SUB_CONFLICTS_IN_A (subscript))
3520 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3521 if (SUB_CONFLICTS_IN_B (subscript))
3522 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3523
3524 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3525 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3526 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3527
3528 /* If there is any undetermined conflict function we have to
3529 give a conservative answer in case we cannot prove that
3530 no dependence exists when analyzing another subscript. */
3531 if (CF_NOT_KNOWN_P (overlaps_a)
3532 || CF_NOT_KNOWN_P (overlaps_b))
3533 {
3534 res = chrec_dont_know;
3535 continue;
3536 }
3537
3538 /* When there is a subscript with no dependence we can stop. */
3539 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3540 || CF_NO_DEPENDENCE_P (overlaps_b))
3541 {
3542 res = chrec_known;
3543 break;
3544 }
3545 }
3546
3547 if (res == NULL_TREE)
3548 return true;
3549
3550 if (res == chrec_known)
3551 dependence_stats.num_dependence_independent++;
3552 else
3553 dependence_stats.num_dependence_undetermined++;
3554 finalize_ddr_dependent (ddr, res);
3555 return false;
3556 }
3557
3558 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3559
3560 static void
3561 subscript_dependence_tester (struct data_dependence_relation *ddr,
3562 struct loop *loop_nest)
3563 {
3564
3565 if (dump_file && (dump_flags & TDF_DETAILS))
3566 fprintf (dump_file, "(subscript_dependence_tester \n");
3567
3568 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3569 dependence_stats.num_dependence_dependent++;
3570
3571 compute_subscript_distance (ddr);
3572 if (build_classic_dist_vector (ddr, loop_nest))
3573 build_classic_dir_vector (ddr);
3574
3575 if (dump_file && (dump_flags & TDF_DETAILS))
3576 fprintf (dump_file, ")\n");
3577 }
3578
3579 /* Returns true when all the access functions of A are affine or
3580 constant with respect to LOOP_NEST. */
3581
3582 static bool
3583 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3584 const struct loop *loop_nest)
3585 {
3586 unsigned int i;
3587 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3588 tree t;
3589
3590 FOR_EACH_VEC_ELT (tree, fns, i, t)
3591 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3592 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3593 return false;
3594
3595 return true;
3596 }
3597
3598 /* Initializes an equation for an OMEGA problem using the information
3599 contained in the ACCESS_FUN. Returns true when the operation
3600 succeeded.
3601
3602 PB is the omega constraint system.
3603 EQ is the number of the equation to be initialized.
3604 OFFSET is used for shifting the variables names in the constraints:
3605 a constrain is composed of 2 * the number of variables surrounding
3606 dependence accesses. OFFSET is set either to 0 for the first n variables,
3607 then it is set to n.
3608 ACCESS_FUN is expected to be an affine chrec. */
3609
3610 static bool
3611 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3612 unsigned int offset, tree access_fun,
3613 struct data_dependence_relation *ddr)
3614 {
3615 switch (TREE_CODE (access_fun))
3616 {
3617 case POLYNOMIAL_CHREC:
3618 {
3619 tree left = CHREC_LEFT (access_fun);
3620 tree right = CHREC_RIGHT (access_fun);
3621 int var = CHREC_VARIABLE (access_fun);
3622 unsigned var_idx;
3623
3624 if (TREE_CODE (right) != INTEGER_CST)
3625 return false;
3626
3627 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3628 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3629
3630 /* Compute the innermost loop index. */
3631 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3632
3633 if (offset == 0)
3634 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3635 += int_cst_value (right);
3636
3637 switch (TREE_CODE (left))
3638 {
3639 case POLYNOMIAL_CHREC:
3640 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3641
3642 case INTEGER_CST:
3643 pb->eqs[eq].coef[0] += int_cst_value (left);
3644 return true;
3645
3646 default:
3647 return false;
3648 }
3649 }
3650
3651 case INTEGER_CST:
3652 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3653 return true;
3654
3655 default:
3656 return false;
3657 }
3658 }
3659
3660 /* As explained in the comments preceding init_omega_for_ddr, we have
3661 to set up a system for each loop level, setting outer loops
3662 variation to zero, and current loop variation to positive or zero.
3663 Save each lexico positive distance vector. */
3664
3665 static void
3666 omega_extract_distance_vectors (omega_pb pb,
3667 struct data_dependence_relation *ddr)
3668 {
3669 int eq, geq;
3670 unsigned i, j;
3671 struct loop *loopi, *loopj;
3672 enum omega_result res;
3673
3674 /* Set a new problem for each loop in the nest. The basis is the
3675 problem that we have initialized until now. On top of this we
3676 add new constraints. */
3677 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3678 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3679 {
3680 int dist = 0;
3681 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3682 DDR_NB_LOOPS (ddr));
3683
3684 omega_copy_problem (copy, pb);
3685
3686 /* For all the outer loops "loop_j", add "dj = 0". */
3687 for (j = 0;
3688 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3689 {
3690 eq = omega_add_zero_eq (copy, omega_black);
3691 copy->eqs[eq].coef[j + 1] = 1;
3692 }
3693
3694 /* For "loop_i", add "0 <= di". */
3695 geq = omega_add_zero_geq (copy, omega_black);
3696 copy->geqs[geq].coef[i + 1] = 1;
3697
3698 /* Reduce the constraint system, and test that the current
3699 problem is feasible. */
3700 res = omega_simplify_problem (copy);
3701 if (res == omega_false
3702 || res == omega_unknown
3703 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3704 goto next_problem;
3705
3706 for (eq = 0; eq < copy->num_subs; eq++)
3707 if (copy->subs[eq].key == (int) i + 1)
3708 {
3709 dist = copy->subs[eq].coef[0];
3710 goto found_dist;
3711 }
3712
3713 if (dist == 0)
3714 {
3715 /* Reinitialize problem... */
3716 omega_copy_problem (copy, pb);
3717 for (j = 0;
3718 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3719 {
3720 eq = omega_add_zero_eq (copy, omega_black);
3721 copy->eqs[eq].coef[j + 1] = 1;
3722 }
3723
3724 /* ..., but this time "di = 1". */
3725 eq = omega_add_zero_eq (copy, omega_black);
3726 copy->eqs[eq].coef[i + 1] = 1;
3727 copy->eqs[eq].coef[0] = -1;
3728
3729 res = omega_simplify_problem (copy);
3730 if (res == omega_false
3731 || res == omega_unknown
3732 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3733 goto next_problem;
3734
3735 for (eq = 0; eq < copy->num_subs; eq++)
3736 if (copy->subs[eq].key == (int) i + 1)
3737 {
3738 dist = copy->subs[eq].coef[0];
3739 goto found_dist;
3740 }
3741 }
3742
3743 found_dist:;
3744 /* Save the lexicographically positive distance vector. */
3745 if (dist >= 0)
3746 {
3747 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3748 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3749
3750 dist_v[i] = dist;
3751
3752 for (eq = 0; eq < copy->num_subs; eq++)
3753 if (copy->subs[eq].key > 0)
3754 {
3755 dist = copy->subs[eq].coef[0];
3756 dist_v[copy->subs[eq].key - 1] = dist;
3757 }
3758
3759 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3760 dir_v[j] = dir_from_dist (dist_v[j]);
3761
3762 save_dist_v (ddr, dist_v);
3763 save_dir_v (ddr, dir_v);
3764 }
3765
3766 next_problem:;
3767 omega_free_problem (copy);
3768 }
3769 }
3770
3771 /* This is called for each subscript of a tuple of data references:
3772 insert an equality for representing the conflicts. */
3773
3774 static bool
3775 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3776 struct data_dependence_relation *ddr,
3777 omega_pb pb, bool *maybe_dependent)
3778 {
3779 int eq;
3780 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3781 TREE_TYPE (access_fun_b));
3782 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3783 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3784 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3785 tree minus_one;
3786
3787 /* When the fun_a - fun_b is not constant, the dependence is not
3788 captured by the classic distance vector representation. */
3789 if (TREE_CODE (difference) != INTEGER_CST)
3790 return false;
3791
3792 /* ZIV test. */
3793 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3794 {
3795 /* There is no dependence. */
3796 *maybe_dependent = false;
3797 return true;
3798 }
3799
3800 minus_one = build_int_cst (type, -1);
3801 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3802
3803 eq = omega_add_zero_eq (pb, omega_black);
3804 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3805 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3806 /* There is probably a dependence, but the system of
3807 constraints cannot be built: answer "don't know". */
3808 return false;
3809
3810 /* GCD test. */
3811 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3812 && !int_divides_p (lambda_vector_gcd
3813 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3814 2 * DDR_NB_LOOPS (ddr)),
3815 pb->eqs[eq].coef[0]))
3816 {
3817 /* There is no dependence. */
3818 *maybe_dependent = false;
3819 return true;
3820 }
3821
3822 return true;
3823 }
3824
3825 /* Helper function, same as init_omega_for_ddr but specialized for
3826 data references A and B. */
3827
3828 static bool
3829 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3830 struct data_dependence_relation *ddr,
3831 omega_pb pb, bool *maybe_dependent)
3832 {
3833 unsigned i;
3834 int ineq;
3835 struct loop *loopi;
3836 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3837
3838 /* Insert an equality per subscript. */
3839 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3840 {
3841 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3842 ddr, pb, maybe_dependent))
3843 return false;
3844 else if (*maybe_dependent == false)
3845 {
3846 /* There is no dependence. */
3847 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3848 return true;
3849 }
3850 }
3851
3852 /* Insert inequalities: constraints corresponding to the iteration
3853 domain, i.e. the loops surrounding the references "loop_x" and
3854 the distance variables "dx". The layout of the OMEGA
3855 representation is as follows:
3856 - coef[0] is the constant
3857 - coef[1..nb_loops] are the protected variables that will not be
3858 removed by the solver: the "dx"
3859 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3860 */
3861 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3862 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3863 {
3864 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3865
3866 /* 0 <= loop_x */
3867 ineq = omega_add_zero_geq (pb, omega_black);
3868 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3869
3870 /* 0 <= loop_x + dx */
3871 ineq = omega_add_zero_geq (pb, omega_black);
3872 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3873 pb->geqs[ineq].coef[i + 1] = 1;
3874
3875 if (nbi != -1)
3876 {
3877 /* loop_x <= nb_iters */
3878 ineq = omega_add_zero_geq (pb, omega_black);
3879 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3880 pb->geqs[ineq].coef[0] = nbi;
3881
3882 /* loop_x + dx <= nb_iters */
3883 ineq = omega_add_zero_geq (pb, omega_black);
3884 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3885 pb->geqs[ineq].coef[i + 1] = -1;
3886 pb->geqs[ineq].coef[0] = nbi;
3887
3888 /* A step "dx" bigger than nb_iters is not feasible, so
3889 add "0 <= nb_iters + dx", */
3890 ineq = omega_add_zero_geq (pb, omega_black);
3891 pb->geqs[ineq].coef[i + 1] = 1;
3892 pb->geqs[ineq].coef[0] = nbi;
3893 /* and "dx <= nb_iters". */
3894 ineq = omega_add_zero_geq (pb, omega_black);
3895 pb->geqs[ineq].coef[i + 1] = -1;
3896 pb->geqs[ineq].coef[0] = nbi;
3897 }
3898 }
3899
3900 omega_extract_distance_vectors (pb, ddr);
3901
3902 return true;
3903 }
3904
3905 /* Sets up the Omega dependence problem for the data dependence
3906 relation DDR. Returns false when the constraint system cannot be
3907 built, ie. when the test answers "don't know". Returns true
3908 otherwise, and when independence has been proved (using one of the
3909 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3910 set MAYBE_DEPENDENT to true.
3911
3912 Example: for setting up the dependence system corresponding to the
3913 conflicting accesses
3914
3915 | loop_i
3916 | loop_j
3917 | A[i, i+1] = ...
3918 | ... A[2*j, 2*(i + j)]
3919 | endloop_j
3920 | endloop_i
3921
3922 the following constraints come from the iteration domain:
3923
3924 0 <= i <= Ni
3925 0 <= i + di <= Ni
3926 0 <= j <= Nj
3927 0 <= j + dj <= Nj
3928
3929 where di, dj are the distance variables. The constraints
3930 representing the conflicting elements are:
3931
3932 i = 2 * (j + dj)
3933 i + 1 = 2 * (i + di + j + dj)
3934
3935 For asking that the resulting distance vector (di, dj) be
3936 lexicographically positive, we insert the constraint "di >= 0". If
3937 "di = 0" in the solution, we fix that component to zero, and we
3938 look at the inner loops: we set a new problem where all the outer
3939 loop distances are zero, and fix this inner component to be
3940 positive. When one of the components is positive, we save that
3941 distance, and set a new problem where the distance on this loop is
3942 zero, searching for other distances in the inner loops. Here is
3943 the classic example that illustrates that we have to set for each
3944 inner loop a new problem:
3945
3946 | loop_1
3947 | loop_2
3948 | A[10]
3949 | endloop_2
3950 | endloop_1
3951
3952 we have to save two distances (1, 0) and (0, 1).
3953
3954 Given two array references, refA and refB, we have to set the
3955 dependence problem twice, refA vs. refB and refB vs. refA, and we
3956 cannot do a single test, as refB might occur before refA in the
3957 inner loops, and the contrary when considering outer loops: ex.
3958
3959 | loop_0
3960 | loop_1
3961 | loop_2
3962 | T[{1,+,1}_2][{1,+,1}_1] // refA
3963 | T[{2,+,1}_2][{0,+,1}_1] // refB
3964 | endloop_2
3965 | endloop_1
3966 | endloop_0
3967
3968 refB touches the elements in T before refA, and thus for the same
3969 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3970 but for successive loop_0 iterations, we have (1, -1, 1)
3971
3972 The Omega solver expects the distance variables ("di" in the
3973 previous example) to come first in the constraint system (as
3974 variables to be protected, or "safe" variables), the constraint
3975 system is built using the following layout:
3976
3977 "cst | distance vars | index vars".
3978 */
3979
3980 static bool
3981 init_omega_for_ddr (struct data_dependence_relation *ddr,
3982 bool *maybe_dependent)
3983 {
3984 omega_pb pb;
3985 bool res = false;
3986
3987 *maybe_dependent = true;
3988
3989 if (same_access_functions (ddr))
3990 {
3991 unsigned j;
3992 lambda_vector dir_v;
3993
3994 /* Save the 0 vector. */
3995 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3996 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3997 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3998 dir_v[j] = dir_equal;
3999 save_dir_v (ddr, dir_v);
4000
4001 /* Save the dependences carried by outer loops. */
4002 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4003 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4004 maybe_dependent);
4005 omega_free_problem (pb);
4006 return res;
4007 }
4008
4009 /* Omega expects the protected variables (those that have to be kept
4010 after elimination) to appear first in the constraint system.
4011 These variables are the distance variables. In the following
4012 initialization we declare NB_LOOPS safe variables, and the total
4013 number of variables for the constraint system is 2*NB_LOOPS. */
4014 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4015 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4016 maybe_dependent);
4017 omega_free_problem (pb);
4018
4019 /* Stop computation if not decidable, or no dependence. */
4020 if (res == false || *maybe_dependent == false)
4021 return res;
4022
4023 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4024 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4025 maybe_dependent);
4026 omega_free_problem (pb);
4027
4028 return res;
4029 }
4030
4031 /* Return true when DDR contains the same information as that stored
4032 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4033
4034 static bool
4035 ddr_consistent_p (FILE *file,
4036 struct data_dependence_relation *ddr,
4037 VEC (lambda_vector, heap) *dist_vects,
4038 VEC (lambda_vector, heap) *dir_vects)
4039 {
4040 unsigned int i, j;
4041
4042 /* If dump_file is set, output there. */
4043 if (dump_file && (dump_flags & TDF_DETAILS))
4044 file = dump_file;
4045
4046 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
4047 {
4048 lambda_vector b_dist_v;
4049 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4050 VEC_length (lambda_vector, dist_vects),
4051 DDR_NUM_DIST_VECTS (ddr));
4052
4053 fprintf (file, "Banerjee dist vectors:\n");
4054 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
4055 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4056
4057 fprintf (file, "Omega dist vectors:\n");
4058 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4059 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4060
4061 fprintf (file, "data dependence relation:\n");
4062 dump_data_dependence_relation (file, ddr);
4063
4064 fprintf (file, ")\n");
4065 return false;
4066 }
4067
4068 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
4069 {
4070 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4071 VEC_length (lambda_vector, dir_vects),
4072 DDR_NUM_DIR_VECTS (ddr));
4073 return false;
4074 }
4075
4076 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4077 {
4078 lambda_vector a_dist_v;
4079 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4080
4081 /* Distance vectors are not ordered in the same way in the DDR
4082 and in the DIST_VECTS: search for a matching vector. */
4083 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
4084 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4085 break;
4086
4087 if (j == VEC_length (lambda_vector, dist_vects))
4088 {
4089 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4090 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4091 fprintf (file, "not found in Omega dist vectors:\n");
4092 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4093 fprintf (file, "data dependence relation:\n");
4094 dump_data_dependence_relation (file, ddr);
4095 fprintf (file, ")\n");
4096 }
4097 }
4098
4099 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4100 {
4101 lambda_vector a_dir_v;
4102 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4103
4104 /* Direction vectors are not ordered in the same way in the DDR
4105 and in the DIR_VECTS: search for a matching vector. */
4106 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
4107 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4108 break;
4109
4110 if (j == VEC_length (lambda_vector, dist_vects))
4111 {
4112 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4113 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4114 fprintf (file, "not found in Omega dir vectors:\n");
4115 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4116 fprintf (file, "data dependence relation:\n");
4117 dump_data_dependence_relation (file, ddr);
4118 fprintf (file, ")\n");
4119 }
4120 }
4121
4122 return true;
4123 }
4124
4125 /* This computes the affine dependence relation between A and B with
4126 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4127 independence between two accesses, while CHREC_DONT_KNOW is used
4128 for representing the unknown relation.
4129
4130 Note that it is possible to stop the computation of the dependence
4131 relation the first time we detect a CHREC_KNOWN element for a given
4132 subscript. */
4133
4134 static void
4135 compute_affine_dependence (struct data_dependence_relation *ddr,
4136 struct loop *loop_nest)
4137 {
4138 struct data_reference *dra = DDR_A (ddr);
4139 struct data_reference *drb = DDR_B (ddr);
4140
4141 if (dump_file && (dump_flags & TDF_DETAILS))
4142 {
4143 fprintf (dump_file, "(compute_affine_dependence\n");
4144 fprintf (dump_file, " stmt_a: ");
4145 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4146 fprintf (dump_file, " stmt_b: ");
4147 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4148 }
4149
4150 /* Analyze only when the dependence relation is not yet known. */
4151 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4152 {
4153 dependence_stats.num_dependence_tests++;
4154
4155 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4156 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4157 {
4158 if (flag_check_data_deps)
4159 {
4160 /* Compute the dependences using the first algorithm. */
4161 subscript_dependence_tester (ddr, loop_nest);
4162
4163 if (dump_file && (dump_flags & TDF_DETAILS))
4164 {
4165 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4166 dump_data_dependence_relation (dump_file, ddr);
4167 }
4168
4169 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4170 {
4171 bool maybe_dependent;
4172 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4173
4174 /* Save the result of the first DD analyzer. */
4175 dist_vects = DDR_DIST_VECTS (ddr);
4176 dir_vects = DDR_DIR_VECTS (ddr);
4177
4178 /* Reset the information. */
4179 DDR_DIST_VECTS (ddr) = NULL;
4180 DDR_DIR_VECTS (ddr) = NULL;
4181
4182 /* Compute the same information using Omega. */
4183 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4184 goto csys_dont_know;
4185
4186 if (dump_file && (dump_flags & TDF_DETAILS))
4187 {
4188 fprintf (dump_file, "Omega Analyzer\n");
4189 dump_data_dependence_relation (dump_file, ddr);
4190 }
4191
4192 /* Check that we get the same information. */
4193 if (maybe_dependent)
4194 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4195 dir_vects));
4196 }
4197 }
4198 else
4199 subscript_dependence_tester (ddr, loop_nest);
4200 }
4201
4202 /* As a last case, if the dependence cannot be determined, or if
4203 the dependence is considered too difficult to determine, answer
4204 "don't know". */
4205 else
4206 {
4207 csys_dont_know:;
4208 dependence_stats.num_dependence_undetermined++;
4209
4210 if (dump_file && (dump_flags & TDF_DETAILS))
4211 {
4212 fprintf (dump_file, "Data ref a:\n");
4213 dump_data_reference (dump_file, dra);
4214 fprintf (dump_file, "Data ref b:\n");
4215 dump_data_reference (dump_file, drb);
4216 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4217 }
4218 finalize_ddr_dependent (ddr, chrec_dont_know);
4219 }
4220 }
4221
4222 if (dump_file && (dump_flags & TDF_DETAILS))
4223 {
4224 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4225 fprintf (dump_file, ") -> no dependence\n");
4226 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4227 fprintf (dump_file, ") -> dependence analysis failed\n");
4228 else
4229 fprintf (dump_file, ")\n");
4230 }
4231 }
4232
4233 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4234 the data references in DATAREFS, in the LOOP_NEST. When
4235 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4236 relations. Return true when successful, i.e. data references number
4237 is small enough to be handled. */
4238
4239 bool
4240 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4241 VEC (ddr_p, heap) **dependence_relations,
4242 VEC (loop_p, heap) *loop_nest,
4243 bool compute_self_and_rr)
4244 {
4245 struct data_dependence_relation *ddr;
4246 struct data_reference *a, *b;
4247 unsigned int i, j;
4248
4249 if ((int) VEC_length (data_reference_p, datarefs)
4250 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4251 {
4252 struct data_dependence_relation *ddr;
4253
4254 /* Insert a single relation into dependence_relations:
4255 chrec_dont_know. */
4256 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4257 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4258 return false;
4259 }
4260
4261 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4262 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4263 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4264 {
4265 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4266 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4267 if (loop_nest)
4268 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4269 }
4270
4271 if (compute_self_and_rr)
4272 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4273 {
4274 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4275 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4276 if (loop_nest)
4277 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4278 }
4279
4280 return true;
4281 }
4282
4283 /* Describes a location of a memory reference. */
4284
4285 typedef struct data_ref_loc_d
4286 {
4287 /* Position of the memory reference. */
4288 tree *pos;
4289
4290 /* True if the memory reference is read. */
4291 bool is_read;
4292 } data_ref_loc;
4293
4294 DEF_VEC_O (data_ref_loc);
4295 DEF_VEC_ALLOC_O (data_ref_loc, heap);
4296
4297 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4298 true if STMT clobbers memory, false otherwise. */
4299
4300 static bool
4301 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4302 {
4303 bool clobbers_memory = false;
4304 data_ref_loc *ref;
4305 tree *op0, *op1;
4306 enum gimple_code stmt_code = gimple_code (stmt);
4307
4308 *references = NULL;
4309
4310 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4311 Calls have side-effects, except those to const or pure
4312 functions. */
4313 if ((stmt_code == GIMPLE_CALL
4314 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4315 || (stmt_code == GIMPLE_ASM
4316 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4317 clobbers_memory = true;
4318
4319 if (!gimple_vuse (stmt))
4320 return clobbers_memory;
4321
4322 if (stmt_code == GIMPLE_ASSIGN)
4323 {
4324 tree base;
4325 op0 = gimple_assign_lhs_ptr (stmt);
4326 op1 = gimple_assign_rhs1_ptr (stmt);
4327
4328 if (DECL_P (*op1)
4329 || (REFERENCE_CLASS_P (*op1)
4330 && (base = get_base_address (*op1))
4331 && TREE_CODE (base) != SSA_NAME))
4332 {
4333 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4334 ref->pos = op1;
4335 ref->is_read = true;
4336 }
4337 }
4338 else if (stmt_code == GIMPLE_CALL)
4339 {
4340 unsigned i, n;
4341
4342 op0 = gimple_call_lhs_ptr (stmt);
4343 n = gimple_call_num_args (stmt);
4344 for (i = 0; i < n; i++)
4345 {
4346 op1 = gimple_call_arg_ptr (stmt, i);
4347
4348 if (DECL_P (*op1)
4349 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4350 {
4351 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4352 ref->pos = op1;
4353 ref->is_read = true;
4354 }
4355 }
4356 }
4357 else
4358 return clobbers_memory;
4359
4360 if (*op0
4361 && (DECL_P (*op0)
4362 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4363 {
4364 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4365 ref->pos = op0;
4366 ref->is_read = false;
4367 }
4368 return clobbers_memory;
4369 }
4370
4371 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4372 reference, returns false, otherwise returns true. NEST is the outermost
4373 loop of the loop nest in which the references should be analyzed. */
4374
4375 bool
4376 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4377 VEC (data_reference_p, heap) **datarefs)
4378 {
4379 unsigned i;
4380 VEC (data_ref_loc, heap) *references;
4381 data_ref_loc *ref;
4382 bool ret = true;
4383 data_reference_p dr;
4384
4385 if (get_references_in_stmt (stmt, &references))
4386 {
4387 VEC_free (data_ref_loc, heap, references);
4388 return false;
4389 }
4390
4391 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4392 {
4393 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4394 *ref->pos, stmt, ref->is_read);
4395 gcc_assert (dr != NULL);
4396 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4397 }
4398 VEC_free (data_ref_loc, heap, references);
4399 return ret;
4400 }
4401
4402 /* Stores the data references in STMT to DATAREFS. If there is an
4403 unanalyzable reference, returns false, otherwise returns true.
4404 NEST is the outermost loop of the loop nest in which the references
4405 should be instantiated, LOOP is the loop in which the references
4406 should be analyzed. */
4407
4408 bool
4409 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4410 VEC (data_reference_p, heap) **datarefs)
4411 {
4412 unsigned i;
4413 VEC (data_ref_loc, heap) *references;
4414 data_ref_loc *ref;
4415 bool ret = true;
4416 data_reference_p dr;
4417
4418 if (get_references_in_stmt (stmt, &references))
4419 {
4420 VEC_free (data_ref_loc, heap, references);
4421 return false;
4422 }
4423
4424 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4425 {
4426 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4427 gcc_assert (dr != NULL);
4428 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4429 }
4430
4431 VEC_free (data_ref_loc, heap, references);
4432 return ret;
4433 }
4434
4435 /* Search the data references in LOOP, and record the information into
4436 DATAREFS. Returns chrec_dont_know when failing to analyze a
4437 difficult case, returns NULL_TREE otherwise. */
4438
4439 tree
4440 find_data_references_in_bb (struct loop *loop, basic_block bb,
4441 VEC (data_reference_p, heap) **datarefs)
4442 {
4443 gimple_stmt_iterator bsi;
4444
4445 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4446 {
4447 gimple stmt = gsi_stmt (bsi);
4448
4449 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4450 {
4451 struct data_reference *res;
4452 res = XCNEW (struct data_reference);
4453 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4454
4455 return chrec_dont_know;
4456 }
4457 }
4458
4459 return NULL_TREE;
4460 }
4461
4462 /* Search the data references in LOOP, and record the information into
4463 DATAREFS. Returns chrec_dont_know when failing to analyze a
4464 difficult case, returns NULL_TREE otherwise.
4465
4466 TODO: This function should be made smarter so that it can handle address
4467 arithmetic as if they were array accesses, etc. */
4468
4469 static tree
4470 find_data_references_in_loop (struct loop *loop,
4471 VEC (data_reference_p, heap) **datarefs)
4472 {
4473 basic_block bb, *bbs;
4474 unsigned int i;
4475
4476 bbs = get_loop_body_in_dom_order (loop);
4477
4478 for (i = 0; i < loop->num_nodes; i++)
4479 {
4480 bb = bbs[i];
4481
4482 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4483 {
4484 free (bbs);
4485 return chrec_dont_know;
4486 }
4487 }
4488 free (bbs);
4489
4490 return NULL_TREE;
4491 }
4492
4493 /* Recursive helper function. */
4494
4495 static bool
4496 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4497 {
4498 /* Inner loops of the nest should not contain siblings. Example:
4499 when there are two consecutive loops,
4500
4501 | loop_0
4502 | loop_1
4503 | A[{0, +, 1}_1]
4504 | endloop_1
4505 | loop_2
4506 | A[{0, +, 1}_2]
4507 | endloop_2
4508 | endloop_0
4509
4510 the dependence relation cannot be captured by the distance
4511 abstraction. */
4512 if (loop->next)
4513 return false;
4514
4515 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4516 if (loop->inner)
4517 return find_loop_nest_1 (loop->inner, loop_nest);
4518 return true;
4519 }
4520
4521 /* Return false when the LOOP is not well nested. Otherwise return
4522 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4523 contain the loops from the outermost to the innermost, as they will
4524 appear in the classic distance vector. */
4525
4526 bool
4527 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4528 {
4529 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4530 if (loop->inner)
4531 return find_loop_nest_1 (loop->inner, loop_nest);
4532 return true;
4533 }
4534
4535 /* Returns true when the data dependences have been computed, false otherwise.
4536 Given a loop nest LOOP, the following vectors are returned:
4537 DATAREFS is initialized to all the array elements contained in this loop,
4538 DEPENDENCE_RELATIONS contains the relations between the data references.
4539 Compute read-read and self relations if
4540 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4541
4542 bool
4543 compute_data_dependences_for_loop (struct loop *loop,
4544 bool compute_self_and_read_read_dependences,
4545 VEC (loop_p, heap) **loop_nest,
4546 VEC (data_reference_p, heap) **datarefs,
4547 VEC (ddr_p, heap) **dependence_relations)
4548 {
4549 bool res = true;
4550
4551 memset (&dependence_stats, 0, sizeof (dependence_stats));
4552
4553 /* If the loop nest is not well formed, or one of the data references
4554 is not computable, give up without spending time to compute other
4555 dependences. */
4556 if (!loop
4557 || !find_loop_nest (loop, loop_nest)
4558 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4559 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4560 compute_self_and_read_read_dependences))
4561 res = false;
4562
4563 if (dump_file && (dump_flags & TDF_STATS))
4564 {
4565 fprintf (dump_file, "Dependence tester statistics:\n");
4566
4567 fprintf (dump_file, "Number of dependence tests: %d\n",
4568 dependence_stats.num_dependence_tests);
4569 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4570 dependence_stats.num_dependence_dependent);
4571 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4572 dependence_stats.num_dependence_independent);
4573 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4574 dependence_stats.num_dependence_undetermined);
4575
4576 fprintf (dump_file, "Number of subscript tests: %d\n",
4577 dependence_stats.num_subscript_tests);
4578 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4579 dependence_stats.num_subscript_undetermined);
4580 fprintf (dump_file, "Number of same subscript function: %d\n",
4581 dependence_stats.num_same_subscript_function);
4582
4583 fprintf (dump_file, "Number of ziv tests: %d\n",
4584 dependence_stats.num_ziv);
4585 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4586 dependence_stats.num_ziv_dependent);
4587 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4588 dependence_stats.num_ziv_independent);
4589 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4590 dependence_stats.num_ziv_unimplemented);
4591
4592 fprintf (dump_file, "Number of siv tests: %d\n",
4593 dependence_stats.num_siv);
4594 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4595 dependence_stats.num_siv_dependent);
4596 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4597 dependence_stats.num_siv_independent);
4598 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4599 dependence_stats.num_siv_unimplemented);
4600
4601 fprintf (dump_file, "Number of miv tests: %d\n",
4602 dependence_stats.num_miv);
4603 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4604 dependence_stats.num_miv_dependent);
4605 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4606 dependence_stats.num_miv_independent);
4607 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4608 dependence_stats.num_miv_unimplemented);
4609 }
4610
4611 return res;
4612 }
4613
4614 /* Returns true when the data dependences for the basic block BB have been
4615 computed, false otherwise.
4616 DATAREFS is initialized to all the array elements contained in this basic
4617 block, DEPENDENCE_RELATIONS contains the relations between the data
4618 references. Compute read-read and self relations if
4619 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4620 bool
4621 compute_data_dependences_for_bb (basic_block bb,
4622 bool compute_self_and_read_read_dependences,
4623 VEC (data_reference_p, heap) **datarefs,
4624 VEC (ddr_p, heap) **dependence_relations)
4625 {
4626 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4627 return false;
4628
4629 return compute_all_dependences (*datarefs, dependence_relations, NULL,
4630 compute_self_and_read_read_dependences);
4631 }
4632
4633 /* Entry point (for testing only). Analyze all the data references
4634 and the dependence relations in LOOP.
4635
4636 The data references are computed first.
4637
4638 A relation on these nodes is represented by a complete graph. Some
4639 of the relations could be of no interest, thus the relations can be
4640 computed on demand.
4641
4642 In the following function we compute all the relations. This is
4643 just a first implementation that is here for:
4644 - for showing how to ask for the dependence relations,
4645 - for the debugging the whole dependence graph,
4646 - for the dejagnu testcases and maintenance.
4647
4648 It is possible to ask only for a part of the graph, avoiding to
4649 compute the whole dependence graph. The computed dependences are
4650 stored in a knowledge base (KB) such that later queries don't
4651 recompute the same information. The implementation of this KB is
4652 transparent to the optimizer, and thus the KB can be changed with a
4653 more efficient implementation, or the KB could be disabled. */
4654 static void
4655 analyze_all_data_dependences (struct loop *loop)
4656 {
4657 unsigned int i;
4658 int nb_data_refs = 10;
4659 VEC (data_reference_p, heap) *datarefs =
4660 VEC_alloc (data_reference_p, heap, nb_data_refs);
4661 VEC (ddr_p, heap) *dependence_relations =
4662 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4663 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4664
4665 /* Compute DDs on the whole function. */
4666 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4667 &dependence_relations);
4668
4669 if (dump_file)
4670 {
4671 dump_data_dependence_relations (dump_file, dependence_relations);
4672 fprintf (dump_file, "\n\n");
4673
4674 if (dump_flags & TDF_DETAILS)
4675 dump_dist_dir_vectors (dump_file, dependence_relations);
4676
4677 if (dump_flags & TDF_STATS)
4678 {
4679 unsigned nb_top_relations = 0;
4680 unsigned nb_bot_relations = 0;
4681 unsigned nb_chrec_relations = 0;
4682 struct data_dependence_relation *ddr;
4683
4684 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4685 {
4686 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4687 nb_top_relations++;
4688
4689 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4690 nb_bot_relations++;
4691
4692 else
4693 nb_chrec_relations++;
4694 }
4695
4696 gather_stats_on_scev_database ();
4697 }
4698 }
4699
4700 VEC_free (loop_p, heap, loop_nest);
4701 free_dependence_relations (dependence_relations);
4702 free_data_refs (datarefs);
4703 }
4704
4705 /* Computes all the data dependences and check that the results of
4706 several analyzers are the same. */
4707
4708 void
4709 tree_check_data_deps (void)
4710 {
4711 loop_iterator li;
4712 struct loop *loop_nest;
4713
4714 FOR_EACH_LOOP (li, loop_nest, 0)
4715 analyze_all_data_dependences (loop_nest);
4716 }
4717
4718 /* Free the memory used by a data dependence relation DDR. */
4719
4720 void
4721 free_dependence_relation (struct data_dependence_relation *ddr)
4722 {
4723 if (ddr == NULL)
4724 return;
4725
4726 if (DDR_SUBSCRIPTS (ddr))
4727 free_subscripts (DDR_SUBSCRIPTS (ddr));
4728 if (DDR_DIST_VECTS (ddr))
4729 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4730 if (DDR_DIR_VECTS (ddr))
4731 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4732
4733 free (ddr);
4734 }
4735
4736 /* Free the memory used by the data dependence relations from
4737 DEPENDENCE_RELATIONS. */
4738
4739 void
4740 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4741 {
4742 unsigned int i;
4743 struct data_dependence_relation *ddr;
4744
4745 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4746 if (ddr)
4747 free_dependence_relation (ddr);
4748
4749 VEC_free (ddr_p, heap, dependence_relations);
4750 }
4751
4752 /* Free the memory used by the data references from DATAREFS. */
4753
4754 void
4755 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4756 {
4757 unsigned int i;
4758 struct data_reference *dr;
4759
4760 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4761 free_data_ref (dr);
4762 VEC_free (data_reference_p, heap, datarefs);
4763 }
4764
4765 \f
4766
4767 /* Dump vertex I in RDG to FILE. */
4768
4769 static void
4770 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4771 {
4772 struct vertex *v = &(rdg->vertices[i]);
4773 struct graph_edge *e;
4774
4775 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4776 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4777 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4778
4779 if (v->pred)
4780 for (e = v->pred; e; e = e->pred_next)
4781 fprintf (file, " %d", e->src);
4782
4783 fprintf (file, ") (out:");
4784
4785 if (v->succ)
4786 for (e = v->succ; e; e = e->succ_next)
4787 fprintf (file, " %d", e->dest);
4788
4789 fprintf (file, ")\n");
4790 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4791 fprintf (file, ")\n");
4792 }
4793
4794 /* Call dump_rdg_vertex on stderr. */
4795
4796 DEBUG_FUNCTION void
4797 debug_rdg_vertex (struct graph *rdg, int i)
4798 {
4799 dump_rdg_vertex (stderr, rdg, i);
4800 }
4801
4802 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4803 dumped vertices to that bitmap. */
4804
4805 static void
4806 dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4807 {
4808 int i;
4809
4810 fprintf (file, "(%d\n", c);
4811
4812 for (i = 0; i < rdg->n_vertices; i++)
4813 if (rdg->vertices[i].component == c)
4814 {
4815 if (dumped)
4816 bitmap_set_bit (dumped, i);
4817
4818 dump_rdg_vertex (file, rdg, i);
4819 }
4820
4821 fprintf (file, ")\n");
4822 }
4823
4824 /* Call dump_rdg_vertex on stderr. */
4825
4826 DEBUG_FUNCTION void
4827 debug_rdg_component (struct graph *rdg, int c)
4828 {
4829 dump_rdg_component (stderr, rdg, c, NULL);
4830 }
4831
4832 /* Dump the reduced dependence graph RDG to FILE. */
4833
4834 void
4835 dump_rdg (FILE *file, struct graph *rdg)
4836 {
4837 int i;
4838 bitmap dumped = BITMAP_ALLOC (NULL);
4839
4840 fprintf (file, "(rdg\n");
4841
4842 for (i = 0; i < rdg->n_vertices; i++)
4843 if (!bitmap_bit_p (dumped, i))
4844 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4845
4846 fprintf (file, ")\n");
4847 BITMAP_FREE (dumped);
4848 }
4849
4850 /* Call dump_rdg on stderr. */
4851
4852 DEBUG_FUNCTION void
4853 debug_rdg (struct graph *rdg)
4854 {
4855 dump_rdg (stderr, rdg);
4856 }
4857
4858 static void
4859 dot_rdg_1 (FILE *file, struct graph *rdg)
4860 {
4861 int i;
4862
4863 fprintf (file, "digraph RDG {\n");
4864
4865 for (i = 0; i < rdg->n_vertices; i++)
4866 {
4867 struct vertex *v = &(rdg->vertices[i]);
4868 struct graph_edge *e;
4869
4870 /* Highlight reads from memory. */
4871 if (RDG_MEM_READS_STMT (rdg, i))
4872 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4873
4874 /* Highlight stores to memory. */
4875 if (RDG_MEM_WRITE_STMT (rdg, i))
4876 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4877
4878 if (v->succ)
4879 for (e = v->succ; e; e = e->succ_next)
4880 switch (RDGE_TYPE (e))
4881 {
4882 case input_dd:
4883 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4884 break;
4885
4886 case output_dd:
4887 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4888 break;
4889
4890 case flow_dd:
4891 /* These are the most common dependences: don't print these. */
4892 fprintf (file, "%d -> %d \n", i, e->dest);
4893 break;
4894
4895 case anti_dd:
4896 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4897 break;
4898
4899 default:
4900 gcc_unreachable ();
4901 }
4902 }
4903
4904 fprintf (file, "}\n\n");
4905 }
4906
4907 /* Display the Reduced Dependence Graph using dotty. */
4908 extern void dot_rdg (struct graph *);
4909
4910 DEBUG_FUNCTION void
4911 dot_rdg (struct graph *rdg)
4912 {
4913 /* When debugging, enable the following code. This cannot be used
4914 in production compilers because it calls "system". */
4915 #if 0
4916 FILE *file = fopen ("/tmp/rdg.dot", "w");
4917 gcc_assert (file != NULL);
4918
4919 dot_rdg_1 (file, rdg);
4920 fclose (file);
4921
4922 system ("dotty /tmp/rdg.dot &");
4923 #else
4924 dot_rdg_1 (stderr, rdg);
4925 #endif
4926 }
4927
4928 /* This structure is used for recording the mapping statement index in
4929 the RDG. */
4930
4931 struct GTY(()) rdg_vertex_info
4932 {
4933 gimple stmt;
4934 int index;
4935 };
4936
4937 /* Returns the index of STMT in RDG. */
4938
4939 int
4940 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4941 {
4942 struct rdg_vertex_info rvi, *slot;
4943
4944 rvi.stmt = stmt;
4945 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4946
4947 if (!slot)
4948 return -1;
4949
4950 return slot->index;
4951 }
4952
4953 /* Creates an edge in RDG for each distance vector from DDR. The
4954 order that we keep track of in the RDG is the order in which
4955 statements have to be executed. */
4956
4957 static void
4958 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4959 {
4960 struct graph_edge *e;
4961 int va, vb;
4962 data_reference_p dra = DDR_A (ddr);
4963 data_reference_p drb = DDR_B (ddr);
4964 unsigned level = ddr_dependence_level (ddr);
4965
4966 /* For non scalar dependences, when the dependence is REVERSED,
4967 statement B has to be executed before statement A. */
4968 if (level > 0
4969 && !DDR_REVERSED_P (ddr))
4970 {
4971 data_reference_p tmp = dra;
4972 dra = drb;
4973 drb = tmp;
4974 }
4975
4976 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4977 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4978
4979 if (va < 0 || vb < 0)
4980 return;
4981
4982 e = add_edge (rdg, va, vb);
4983 e->data = XNEW (struct rdg_edge);
4984
4985 RDGE_LEVEL (e) = level;
4986 RDGE_RELATION (e) = ddr;
4987
4988 /* Determines the type of the data dependence. */
4989 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4990 RDGE_TYPE (e) = input_dd;
4991 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4992 RDGE_TYPE (e) = output_dd;
4993 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4994 RDGE_TYPE (e) = flow_dd;
4995 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4996 RDGE_TYPE (e) = anti_dd;
4997 }
4998
4999 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
5000 the index of DEF in RDG. */
5001
5002 static void
5003 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
5004 {
5005 use_operand_p imm_use_p;
5006 imm_use_iterator iterator;
5007
5008 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
5009 {
5010 struct graph_edge *e;
5011 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
5012
5013 if (use < 0)
5014 continue;
5015
5016 e = add_edge (rdg, idef, use);
5017 e->data = XNEW (struct rdg_edge);
5018 RDGE_TYPE (e) = flow_dd;
5019 RDGE_RELATION (e) = NULL;
5020 }
5021 }
5022
5023 /* Creates the edges of the reduced dependence graph RDG. */
5024
5025 static void
5026 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
5027 {
5028 int i;
5029 struct data_dependence_relation *ddr;
5030 def_operand_p def_p;
5031 ssa_op_iter iter;
5032
5033 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
5034 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5035 create_rdg_edge_for_ddr (rdg, ddr);
5036
5037 for (i = 0; i < rdg->n_vertices; i++)
5038 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
5039 iter, SSA_OP_DEF)
5040 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
5041 }
5042
5043 /* Build the vertices of the reduced dependence graph RDG. */
5044
5045 void
5046 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
5047 {
5048 int i, j;
5049 gimple stmt;
5050
5051 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
5052 {
5053 VEC (data_ref_loc, heap) *references;
5054 data_ref_loc *ref;
5055 struct vertex *v = &(rdg->vertices[i]);
5056 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
5057 struct rdg_vertex_info **slot;
5058
5059 rvi->stmt = stmt;
5060 rvi->index = i;
5061 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
5062
5063 if (!*slot)
5064 *slot = rvi;
5065 else
5066 free (rvi);
5067
5068 v->data = XNEW (struct rdg_vertex);
5069 RDG_STMT (rdg, i) = stmt;
5070
5071 RDG_MEM_WRITE_STMT (rdg, i) = false;
5072 RDG_MEM_READS_STMT (rdg, i) = false;
5073 if (gimple_code (stmt) == GIMPLE_PHI)
5074 continue;
5075
5076 get_references_in_stmt (stmt, &references);
5077 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
5078 if (!ref->is_read)
5079 RDG_MEM_WRITE_STMT (rdg, i) = true;
5080 else
5081 RDG_MEM_READS_STMT (rdg, i) = true;
5082
5083 VEC_free (data_ref_loc, heap, references);
5084 }
5085 }
5086
5087 /* Initialize STMTS with all the statements of LOOP. When
5088 INCLUDE_PHIS is true, include also the PHI nodes. The order in
5089 which we discover statements is important as
5090 generate_loops_for_partition is using the same traversal for
5091 identifying statements. */
5092
5093 static void
5094 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5095 {
5096 unsigned int i;
5097 basic_block *bbs = get_loop_body_in_dom_order (loop);
5098
5099 for (i = 0; i < loop->num_nodes; i++)
5100 {
5101 basic_block bb = bbs[i];
5102 gimple_stmt_iterator bsi;
5103 gimple stmt;
5104
5105 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5106 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5107
5108 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5109 {
5110 stmt = gsi_stmt (bsi);
5111 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5112 VEC_safe_push (gimple, heap, *stmts, stmt);
5113 }
5114 }
5115
5116 free (bbs);
5117 }
5118
5119 /* Returns true when all the dependences are computable. */
5120
5121 static bool
5122 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5123 {
5124 ddr_p ddr;
5125 unsigned int i;
5126
5127 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5128 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5129 return false;
5130
5131 return true;
5132 }
5133
5134 /* Computes a hash function for element ELT. */
5135
5136 static hashval_t
5137 hash_stmt_vertex_info (const void *elt)
5138 {
5139 const struct rdg_vertex_info *const rvi =
5140 (const struct rdg_vertex_info *) elt;
5141 gimple stmt = rvi->stmt;
5142
5143 return htab_hash_pointer (stmt);
5144 }
5145
5146 /* Compares database elements E1 and E2. */
5147
5148 static int
5149 eq_stmt_vertex_info (const void *e1, const void *e2)
5150 {
5151 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5152 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5153
5154 return elt1->stmt == elt2->stmt;
5155 }
5156
5157 /* Free the element E. */
5158
5159 static void
5160 hash_stmt_vertex_del (void *e)
5161 {
5162 free (e);
5163 }
5164
5165 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5166 statement of the loop nest, and one edge per data dependence or
5167 scalar dependence. */
5168
5169 struct graph *
5170 build_empty_rdg (int n_stmts)
5171 {
5172 int nb_data_refs = 10;
5173 struct graph *rdg = new_graph (n_stmts);
5174
5175 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5176 eq_stmt_vertex_info, hash_stmt_vertex_del);
5177 return rdg;
5178 }
5179
5180 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5181 statement of the loop nest, and one edge per data dependence or
5182 scalar dependence. */
5183
5184 struct graph *
5185 build_rdg (struct loop *loop,
5186 VEC (loop_p, heap) **loop_nest,
5187 VEC (ddr_p, heap) **dependence_relations,
5188 VEC (data_reference_p, heap) **datarefs)
5189 {
5190 struct graph *rdg = NULL;
5191
5192 if (compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5193 dependence_relations)
5194 && known_dependences_p (*dependence_relations))
5195 {
5196 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5197 stmts_from_loop (loop, &stmts);
5198 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5199 create_rdg_vertices (rdg, stmts);
5200 create_rdg_edges (rdg, *dependence_relations);
5201 VEC_free (gimple, heap, stmts);
5202 }
5203
5204 return rdg;
5205 }
5206
5207 /* Free the reduced dependence graph RDG. */
5208
5209 void
5210 free_rdg (struct graph *rdg)
5211 {
5212 int i;
5213
5214 for (i = 0; i < rdg->n_vertices; i++)
5215 {
5216 struct vertex *v = &(rdg->vertices[i]);
5217 struct graph_edge *e;
5218
5219 for (e = v->succ; e; e = e->succ_next)
5220 free (e->data);
5221
5222 free (v->data);
5223 }
5224
5225 htab_delete (rdg->indices);
5226 free_graph (rdg);
5227 }
5228
5229 /* Initialize STMTS with all the statements of LOOP that contain a
5230 store to memory. */
5231
5232 void
5233 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5234 {
5235 unsigned int i;
5236 basic_block *bbs = get_loop_body_in_dom_order (loop);
5237
5238 for (i = 0; i < loop->num_nodes; i++)
5239 {
5240 basic_block bb = bbs[i];
5241 gimple_stmt_iterator bsi;
5242
5243 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5244 if (gimple_vdef (gsi_stmt (bsi)))
5245 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5246 }
5247
5248 free (bbs);
5249 }
5250
5251 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5252 that contains a data reference on its LHS with a stride of the same
5253 size as its unit type. */
5254
5255 bool
5256 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5257 {
5258 tree op0, op1;
5259 bool res;
5260 struct data_reference *dr;
5261
5262 if (!stmt
5263 || !gimple_vdef (stmt)
5264 || !is_gimple_assign (stmt)
5265 || !gimple_assign_single_p (stmt)
5266 || !(op1 = gimple_assign_rhs1 (stmt))
5267 || !(integer_zerop (op1) || real_zerop (op1)))
5268 return false;
5269
5270 dr = XCNEW (struct data_reference);
5271 op0 = gimple_assign_lhs (stmt);
5272
5273 DR_STMT (dr) = stmt;
5274 DR_REF (dr) = op0;
5275
5276 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5277 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5278
5279 free_data_ref (dr);
5280 return res;
5281 }
5282
5283 /* Initialize STMTS with all the statements of LOOP that contain a
5284 store to memory of the form "A[i] = 0". */
5285
5286 void
5287 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5288 {
5289 unsigned int i;
5290 basic_block bb;
5291 gimple_stmt_iterator si;
5292 gimple stmt;
5293 basic_block *bbs = get_loop_body_in_dom_order (loop);
5294
5295 for (i = 0; i < loop->num_nodes; i++)
5296 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5297 if ((stmt = gsi_stmt (si))
5298 && stmt_with_adjacent_zero_store_dr_p (stmt))
5299 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5300
5301 free (bbs);
5302 }
5303
5304 /* For a data reference REF, return the declaration of its base
5305 address or NULL_TREE if the base is not determined. */
5306
5307 static inline tree
5308 ref_base_address (gimple stmt, data_ref_loc *ref)
5309 {
5310 tree base = NULL_TREE;
5311 tree base_address;
5312 struct data_reference *dr = XCNEW (struct data_reference);
5313
5314 DR_STMT (dr) = stmt;
5315 DR_REF (dr) = *ref->pos;
5316 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5317 base_address = DR_BASE_ADDRESS (dr);
5318
5319 if (!base_address)
5320 goto end;
5321
5322 switch (TREE_CODE (base_address))
5323 {
5324 case ADDR_EXPR:
5325 base = TREE_OPERAND (base_address, 0);
5326 break;
5327
5328 default:
5329 base = base_address;
5330 break;
5331 }
5332
5333 end:
5334 free_data_ref (dr);
5335 return base;
5336 }
5337
5338 /* Determines whether the statement from vertex V of the RDG has a
5339 definition used outside the loop that contains this statement. */
5340
5341 bool
5342 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5343 {
5344 gimple stmt = RDG_STMT (rdg, v);
5345 struct loop *loop = loop_containing_stmt (stmt);
5346 use_operand_p imm_use_p;
5347 imm_use_iterator iterator;
5348 ssa_op_iter it;
5349 def_operand_p def_p;
5350
5351 if (!loop)
5352 return true;
5353
5354 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5355 {
5356 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5357 {
5358 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5359 return true;
5360 }
5361 }
5362
5363 return false;
5364 }
5365
5366 /* Determines whether statements S1 and S2 access to similar memory
5367 locations. Two memory accesses are considered similar when they
5368 have the same base address declaration, i.e. when their
5369 ref_base_address is the same. */
5370
5371 bool
5372 have_similar_memory_accesses (gimple s1, gimple s2)
5373 {
5374 bool res = false;
5375 unsigned i, j;
5376 VEC (data_ref_loc, heap) *refs1, *refs2;
5377 data_ref_loc *ref1, *ref2;
5378
5379 get_references_in_stmt (s1, &refs1);
5380 get_references_in_stmt (s2, &refs2);
5381
5382 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5383 {
5384 tree base1 = ref_base_address (s1, ref1);
5385
5386 if (base1)
5387 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5388 if (base1 == ref_base_address (s2, ref2))
5389 {
5390 res = true;
5391 goto end;
5392 }
5393 }
5394
5395 end:
5396 VEC_free (data_ref_loc, heap, refs1);
5397 VEC_free (data_ref_loc, heap, refs2);
5398 return res;
5399 }
5400
5401 /* Helper function for the hashtab. */
5402
5403 static int
5404 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5405 {
5406 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5407 CONST_CAST_GIMPLE ((const_gimple) s2));
5408 }
5409
5410 /* Helper function for the hashtab. */
5411
5412 static hashval_t
5413 ref_base_address_1 (const void *s)
5414 {
5415 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5416 unsigned i;
5417 VEC (data_ref_loc, heap) *refs;
5418 data_ref_loc *ref;
5419 hashval_t res = 0;
5420
5421 get_references_in_stmt (stmt, &refs);
5422
5423 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5424 if (!ref->is_read)
5425 {
5426 res = htab_hash_pointer (ref_base_address (stmt, ref));
5427 break;
5428 }
5429
5430 VEC_free (data_ref_loc, heap, refs);
5431 return res;
5432 }
5433
5434 /* Try to remove duplicated write data references from STMTS. */
5435
5436 void
5437 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5438 {
5439 unsigned i;
5440 gimple stmt;
5441 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5442 have_similar_memory_accesses_1, NULL);
5443
5444 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5445 {
5446 void **slot;
5447
5448 slot = htab_find_slot (seen, stmt, INSERT);
5449
5450 if (*slot)
5451 VEC_ordered_remove (gimple, *stmts, i);
5452 else
5453 {
5454 *slot = (void *) stmt;
5455 i++;
5456 }
5457 }
5458
5459 htab_delete (seen);
5460 }
5461