]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-data-ref.c
Pool alignment information for common bases
[thirdparty/gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
98
99 static struct datadep_stats
100 {
101 int num_dependence_tests;
102 int num_dependence_dependent;
103 int num_dependence_independent;
104 int num_dependence_undetermined;
105
106 int num_subscript_tests;
107 int num_subscript_undetermined;
108 int num_same_subscript_function;
109
110 int num_ziv;
111 int num_ziv_independent;
112 int num_ziv_dependent;
113 int num_ziv_unimplemented;
114
115 int num_siv;
116 int num_siv_independent;
117 int num_siv_dependent;
118 int num_siv_unimplemented;
119
120 int num_miv;
121 int num_miv_independent;
122 int num_miv_dependent;
123 int num_miv_unimplemented;
124 } dependence_stats;
125
126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
127 unsigned int, unsigned int,
128 struct loop *);
129 /* Returns true iff A divides B. */
130
131 static inline bool
132 tree_fold_divides_p (const_tree a, const_tree b)
133 {
134 gcc_assert (TREE_CODE (a) == INTEGER_CST);
135 gcc_assert (TREE_CODE (b) == INTEGER_CST);
136 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
137 }
138
139 /* Returns true iff A divides B. */
140
141 static inline bool
142 int_divides_p (int a, int b)
143 {
144 return ((b % a) == 0);
145 }
146
147 /* Return true if reference REF contains a union access. */
148
149 static bool
150 ref_contains_union_access_p (tree ref)
151 {
152 while (handled_component_p (ref))
153 {
154 ref = TREE_OPERAND (ref, 0);
155 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
156 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
157 return true;
158 }
159 return false;
160 }
161
162 \f
163
164 /* Dump into FILE all the data references from DATAREFS. */
165
166 static void
167 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
168 {
169 unsigned int i;
170 struct data_reference *dr;
171
172 FOR_EACH_VEC_ELT (datarefs, i, dr)
173 dump_data_reference (file, dr);
174 }
175
176 /* Unified dump into FILE all the data references from DATAREFS. */
177
178 DEBUG_FUNCTION void
179 debug (vec<data_reference_p> &ref)
180 {
181 dump_data_references (stderr, ref);
182 }
183
184 DEBUG_FUNCTION void
185 debug (vec<data_reference_p> *ptr)
186 {
187 if (ptr)
188 debug (*ptr);
189 else
190 fprintf (stderr, "<nil>\n");
191 }
192
193
194 /* Dump into STDERR all the data references from DATAREFS. */
195
196 DEBUG_FUNCTION void
197 debug_data_references (vec<data_reference_p> datarefs)
198 {
199 dump_data_references (stderr, datarefs);
200 }
201
202 /* Print to STDERR the data_reference DR. */
203
204 DEBUG_FUNCTION void
205 debug_data_reference (struct data_reference *dr)
206 {
207 dump_data_reference (stderr, dr);
208 }
209
210 /* Dump function for a DATA_REFERENCE structure. */
211
212 void
213 dump_data_reference (FILE *outf,
214 struct data_reference *dr)
215 {
216 unsigned int i;
217
218 fprintf (outf, "#(Data Ref: \n");
219 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
220 fprintf (outf, "# stmt: ");
221 print_gimple_stmt (outf, DR_STMT (dr), 0);
222 fprintf (outf, "# ref: ");
223 print_generic_stmt (outf, DR_REF (dr));
224 fprintf (outf, "# base_object: ");
225 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
226
227 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
228 {
229 fprintf (outf, "# Access function %d: ", i);
230 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
231 }
232 fprintf (outf, "#)\n");
233 }
234
235 /* Unified dump function for a DATA_REFERENCE structure. */
236
237 DEBUG_FUNCTION void
238 debug (data_reference &ref)
239 {
240 dump_data_reference (stderr, &ref);
241 }
242
243 DEBUG_FUNCTION void
244 debug (data_reference *ptr)
245 {
246 if (ptr)
247 debug (*ptr);
248 else
249 fprintf (stderr, "<nil>\n");
250 }
251
252
253 /* Dumps the affine function described by FN to the file OUTF. */
254
255 DEBUG_FUNCTION void
256 dump_affine_function (FILE *outf, affine_fn fn)
257 {
258 unsigned i;
259 tree coef;
260
261 print_generic_expr (outf, fn[0], TDF_SLIM);
262 for (i = 1; fn.iterate (i, &coef); i++)
263 {
264 fprintf (outf, " + ");
265 print_generic_expr (outf, coef, TDF_SLIM);
266 fprintf (outf, " * x_%u", i);
267 }
268 }
269
270 /* Dumps the conflict function CF to the file OUTF. */
271
272 DEBUG_FUNCTION void
273 dump_conflict_function (FILE *outf, conflict_function *cf)
274 {
275 unsigned i;
276
277 if (cf->n == NO_DEPENDENCE)
278 fprintf (outf, "no dependence");
279 else if (cf->n == NOT_KNOWN)
280 fprintf (outf, "not known");
281 else
282 {
283 for (i = 0; i < cf->n; i++)
284 {
285 if (i != 0)
286 fprintf (outf, " ");
287 fprintf (outf, "[");
288 dump_affine_function (outf, cf->fns[i]);
289 fprintf (outf, "]");
290 }
291 }
292 }
293
294 /* Dump function for a SUBSCRIPT structure. */
295
296 DEBUG_FUNCTION void
297 dump_subscript (FILE *outf, struct subscript *subscript)
298 {
299 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
300
301 fprintf (outf, "\n (subscript \n");
302 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
303 dump_conflict_function (outf, cf);
304 if (CF_NONTRIVIAL_P (cf))
305 {
306 tree last_iteration = SUB_LAST_CONFLICT (subscript);
307 fprintf (outf, "\n last_conflict: ");
308 print_generic_expr (outf, last_iteration);
309 }
310
311 cf = SUB_CONFLICTS_IN_B (subscript);
312 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
313 dump_conflict_function (outf, cf);
314 if (CF_NONTRIVIAL_P (cf))
315 {
316 tree last_iteration = SUB_LAST_CONFLICT (subscript);
317 fprintf (outf, "\n last_conflict: ");
318 print_generic_expr (outf, last_iteration);
319 }
320
321 fprintf (outf, "\n (Subscript distance: ");
322 print_generic_expr (outf, SUB_DISTANCE (subscript));
323 fprintf (outf, " ))\n");
324 }
325
326 /* Print the classic direction vector DIRV to OUTF. */
327
328 DEBUG_FUNCTION void
329 print_direction_vector (FILE *outf,
330 lambda_vector dirv,
331 int length)
332 {
333 int eq;
334
335 for (eq = 0; eq < length; eq++)
336 {
337 enum data_dependence_direction dir = ((enum data_dependence_direction)
338 dirv[eq]);
339
340 switch (dir)
341 {
342 case dir_positive:
343 fprintf (outf, " +");
344 break;
345 case dir_negative:
346 fprintf (outf, " -");
347 break;
348 case dir_equal:
349 fprintf (outf, " =");
350 break;
351 case dir_positive_or_equal:
352 fprintf (outf, " +=");
353 break;
354 case dir_positive_or_negative:
355 fprintf (outf, " +-");
356 break;
357 case dir_negative_or_equal:
358 fprintf (outf, " -=");
359 break;
360 case dir_star:
361 fprintf (outf, " *");
362 break;
363 default:
364 fprintf (outf, "indep");
365 break;
366 }
367 }
368 fprintf (outf, "\n");
369 }
370
371 /* Print a vector of direction vectors. */
372
373 DEBUG_FUNCTION void
374 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
375 int length)
376 {
377 unsigned j;
378 lambda_vector v;
379
380 FOR_EACH_VEC_ELT (dir_vects, j, v)
381 print_direction_vector (outf, v, length);
382 }
383
384 /* Print out a vector VEC of length N to OUTFILE. */
385
386 DEBUG_FUNCTION void
387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
388 {
389 int i;
390
391 for (i = 0; i < n; i++)
392 fprintf (outfile, "%3d ", vector[i]);
393 fprintf (outfile, "\n");
394 }
395
396 /* Print a vector of distance vectors. */
397
398 DEBUG_FUNCTION void
399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 int length)
401 {
402 unsigned j;
403 lambda_vector v;
404
405 FOR_EACH_VEC_ELT (dist_vects, j, v)
406 print_lambda_vector (outf, v, length);
407 }
408
409 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
410
411 DEBUG_FUNCTION void
412 dump_data_dependence_relation (FILE *outf,
413 struct data_dependence_relation *ddr)
414 {
415 struct data_reference *dra, *drb;
416
417 fprintf (outf, "(Data Dep: \n");
418
419 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
420 {
421 if (ddr)
422 {
423 dra = DDR_A (ddr);
424 drb = DDR_B (ddr);
425 if (dra)
426 dump_data_reference (outf, dra);
427 else
428 fprintf (outf, " (nil)\n");
429 if (drb)
430 dump_data_reference (outf, drb);
431 else
432 fprintf (outf, " (nil)\n");
433 }
434 fprintf (outf, " (don't know)\n)\n");
435 return;
436 }
437
438 dra = DDR_A (ddr);
439 drb = DDR_B (ddr);
440 dump_data_reference (outf, dra);
441 dump_data_reference (outf, drb);
442
443 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
444 fprintf (outf, " (no dependence)\n");
445
446 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
447 {
448 unsigned int i;
449 struct loop *loopi;
450
451 subscript *sub;
452 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
453 {
454 fprintf (outf, " access_fn_A: ");
455 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
456 fprintf (outf, " access_fn_B: ");
457 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
458 dump_subscript (outf, sub);
459 }
460
461 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
462 fprintf (outf, " loop nest: (");
463 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
464 fprintf (outf, "%d ", loopi->num);
465 fprintf (outf, ")\n");
466
467 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
468 {
469 fprintf (outf, " distance_vector: ");
470 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
471 DDR_NB_LOOPS (ddr));
472 }
473
474 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
475 {
476 fprintf (outf, " direction_vector: ");
477 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
478 DDR_NB_LOOPS (ddr));
479 }
480 }
481
482 fprintf (outf, ")\n");
483 }
484
485 /* Debug version. */
486
487 DEBUG_FUNCTION void
488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
489 {
490 dump_data_dependence_relation (stderr, ddr);
491 }
492
493 /* Dump into FILE all the dependence relations from DDRS. */
494
495 DEBUG_FUNCTION void
496 dump_data_dependence_relations (FILE *file,
497 vec<ddr_p> ddrs)
498 {
499 unsigned int i;
500 struct data_dependence_relation *ddr;
501
502 FOR_EACH_VEC_ELT (ddrs, i, ddr)
503 dump_data_dependence_relation (file, ddr);
504 }
505
506 DEBUG_FUNCTION void
507 debug (vec<ddr_p> &ref)
508 {
509 dump_data_dependence_relations (stderr, ref);
510 }
511
512 DEBUG_FUNCTION void
513 debug (vec<ddr_p> *ptr)
514 {
515 if (ptr)
516 debug (*ptr);
517 else
518 fprintf (stderr, "<nil>\n");
519 }
520
521
522 /* Dump to STDERR all the dependence relations from DDRS. */
523
524 DEBUG_FUNCTION void
525 debug_data_dependence_relations (vec<ddr_p> ddrs)
526 {
527 dump_data_dependence_relations (stderr, ddrs);
528 }
529
530 /* Dumps the distance and direction vectors in FILE. DDRS contains
531 the dependence relations, and VECT_SIZE is the size of the
532 dependence vectors, or in other words the number of loops in the
533 considered nest. */
534
535 DEBUG_FUNCTION void
536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
537 {
538 unsigned int i, j;
539 struct data_dependence_relation *ddr;
540 lambda_vector v;
541
542 FOR_EACH_VEC_ELT (ddrs, i, ddr)
543 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
544 {
545 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
546 {
547 fprintf (file, "DISTANCE_V (");
548 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
549 fprintf (file, ")\n");
550 }
551
552 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
553 {
554 fprintf (file, "DIRECTION_V (");
555 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
556 fprintf (file, ")\n");
557 }
558 }
559
560 fprintf (file, "\n\n");
561 }
562
563 /* Dumps the data dependence relations DDRS in FILE. */
564
565 DEBUG_FUNCTION void
566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
567 {
568 unsigned int i;
569 struct data_dependence_relation *ddr;
570
571 FOR_EACH_VEC_ELT (ddrs, i, ddr)
572 dump_data_dependence_relation (file, ddr);
573
574 fprintf (file, "\n\n");
575 }
576
577 DEBUG_FUNCTION void
578 debug_ddrs (vec<ddr_p> ddrs)
579 {
580 dump_ddrs (stderr, ddrs);
581 }
582
583 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
584 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
585 constant of type ssizetype, and returns true. If we cannot do this
586 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
587 is returned. */
588
589 static bool
590 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
591 tree *var, tree *off)
592 {
593 tree var0, var1;
594 tree off0, off1;
595 enum tree_code ocode = code;
596
597 *var = NULL_TREE;
598 *off = NULL_TREE;
599
600 switch (code)
601 {
602 case INTEGER_CST:
603 *var = build_int_cst (type, 0);
604 *off = fold_convert (ssizetype, op0);
605 return true;
606
607 case POINTER_PLUS_EXPR:
608 ocode = PLUS_EXPR;
609 /* FALLTHROUGH */
610 case PLUS_EXPR:
611 case MINUS_EXPR:
612 split_constant_offset (op0, &var0, &off0);
613 split_constant_offset (op1, &var1, &off1);
614 *var = fold_build2 (code, type, var0, var1);
615 *off = size_binop (ocode, off0, off1);
616 return true;
617
618 case MULT_EXPR:
619 if (TREE_CODE (op1) != INTEGER_CST)
620 return false;
621
622 split_constant_offset (op0, &var0, &off0);
623 *var = fold_build2 (MULT_EXPR, type, var0, op1);
624 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
625 return true;
626
627 case ADDR_EXPR:
628 {
629 tree base, poffset;
630 HOST_WIDE_INT pbitsize, pbitpos;
631 machine_mode pmode;
632 int punsignedp, preversep, pvolatilep;
633
634 op0 = TREE_OPERAND (op0, 0);
635 base
636 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
637 &punsignedp, &preversep, &pvolatilep);
638
639 if (pbitpos % BITS_PER_UNIT != 0)
640 return false;
641 base = build_fold_addr_expr (base);
642 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
643
644 if (poffset)
645 {
646 split_constant_offset (poffset, &poffset, &off1);
647 off0 = size_binop (PLUS_EXPR, off0, off1);
648 if (POINTER_TYPE_P (TREE_TYPE (base)))
649 base = fold_build_pointer_plus (base, poffset);
650 else
651 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
652 fold_convert (TREE_TYPE (base), poffset));
653 }
654
655 var0 = fold_convert (type, base);
656
657 /* If variable length types are involved, punt, otherwise casts
658 might be converted into ARRAY_REFs in gimplify_conversion.
659 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
660 possibly no longer appears in current GIMPLE, might resurface.
661 This perhaps could run
662 if (CONVERT_EXPR_P (var0))
663 {
664 gimplify_conversion (&var0);
665 // Attempt to fill in any within var0 found ARRAY_REF's
666 // element size from corresponding op embedded ARRAY_REF,
667 // if unsuccessful, just punt.
668 } */
669 while (POINTER_TYPE_P (type))
670 type = TREE_TYPE (type);
671 if (int_size_in_bytes (type) < 0)
672 return false;
673
674 *var = var0;
675 *off = off0;
676 return true;
677 }
678
679 case SSA_NAME:
680 {
681 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
682 return false;
683
684 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
685 enum tree_code subcode;
686
687 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
688 return false;
689
690 var0 = gimple_assign_rhs1 (def_stmt);
691 subcode = gimple_assign_rhs_code (def_stmt);
692 var1 = gimple_assign_rhs2 (def_stmt);
693
694 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
695 }
696 CASE_CONVERT:
697 {
698 /* We must not introduce undefined overflow, and we must not change the value.
699 Hence we're okay if the inner type doesn't overflow to start with
700 (pointer or signed), the outer type also is an integer or pointer
701 and the outer precision is at least as large as the inner. */
702 tree itype = TREE_TYPE (op0);
703 if ((POINTER_TYPE_P (itype)
704 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
705 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
706 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
707 {
708 split_constant_offset (op0, &var0, off);
709 *var = fold_convert (type, var0);
710 return true;
711 }
712 return false;
713 }
714
715 default:
716 return false;
717 }
718 }
719
720 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
721 will be ssizetype. */
722
723 void
724 split_constant_offset (tree exp, tree *var, tree *off)
725 {
726 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
727 enum tree_code code;
728
729 *var = exp;
730 *off = ssize_int (0);
731 STRIP_NOPS (exp);
732
733 if (tree_is_chrec (exp)
734 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
735 return;
736
737 otype = TREE_TYPE (exp);
738 code = TREE_CODE (exp);
739 extract_ops_from_tree (exp, &code, &op0, &op1);
740 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
741 {
742 *var = fold_convert (type, e);
743 *off = o;
744 }
745 }
746
747 /* Returns the address ADDR of an object in a canonical shape (without nop
748 casts, and with type of pointer to the object). */
749
750 static tree
751 canonicalize_base_object_address (tree addr)
752 {
753 tree orig = addr;
754
755 STRIP_NOPS (addr);
756
757 /* The base address may be obtained by casting from integer, in that case
758 keep the cast. */
759 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
760 return orig;
761
762 if (TREE_CODE (addr) != ADDR_EXPR)
763 return addr;
764
765 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
766 }
767
768 /* Analyze the behavior of memory reference REF. There are two modes:
769
770 - BB analysis. In this case we simply split the address into base,
771 init and offset components, without reference to any containing loop.
772 The resulting base and offset are general expressions and they can
773 vary arbitrarily from one iteration of the containing loop to the next.
774 The step is always zero.
775
776 - loop analysis. In this case we analyze the reference both wrt LOOP
777 and on the basis that the reference occurs (is "used") in LOOP;
778 see the comment above analyze_scalar_evolution_in_loop for more
779 information about this distinction. The base, init, offset and
780 step fields are all invariant in LOOP.
781
782 Perform BB analysis if LOOP is null, or if LOOP is the function's
783 dummy outermost loop. In other cases perform loop analysis.
784
785 Return true if the analysis succeeded and store the results in DRB if so.
786 BB analysis can only fail for bitfield or reversed-storage accesses. */
787
788 bool
789 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
790 struct loop *loop)
791 {
792 HOST_WIDE_INT pbitsize, pbitpos;
793 tree base, poffset;
794 machine_mode pmode;
795 int punsignedp, preversep, pvolatilep;
796 affine_iv base_iv, offset_iv;
797 tree init, dinit, step;
798 bool in_loop = (loop && loop->num);
799
800 if (dump_file && (dump_flags & TDF_DETAILS))
801 fprintf (dump_file, "analyze_innermost: ");
802
803 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
804 &punsignedp, &preversep, &pvolatilep);
805 gcc_assert (base != NULL_TREE);
806
807 if (pbitpos % BITS_PER_UNIT != 0)
808 {
809 if (dump_file && (dump_flags & TDF_DETAILS))
810 fprintf (dump_file, "failed: bit offset alignment.\n");
811 return false;
812 }
813
814 if (preversep)
815 {
816 if (dump_file && (dump_flags & TDF_DETAILS))
817 fprintf (dump_file, "failed: reverse storage order.\n");
818 return false;
819 }
820
821 /* Calculate the alignment and misalignment for the inner reference. */
822 unsigned int HOST_WIDE_INT base_misalignment;
823 unsigned int base_alignment;
824 get_object_alignment_1 (base, &base_alignment, &base_misalignment);
825
826 /* There are no bitfield references remaining in BASE, so the values
827 we got back must be whole bytes. */
828 gcc_assert (base_alignment % BITS_PER_UNIT == 0
829 && base_misalignment % BITS_PER_UNIT == 0);
830 base_alignment /= BITS_PER_UNIT;
831 base_misalignment /= BITS_PER_UNIT;
832
833 if (TREE_CODE (base) == MEM_REF)
834 {
835 if (!integer_zerop (TREE_OPERAND (base, 1)))
836 {
837 /* Subtract MOFF from the base and add it to POFFSET instead.
838 Adjust the misalignment to reflect the amount we subtracted. */
839 offset_int moff = mem_ref_offset (base);
840 base_misalignment -= moff.to_short_addr ();
841 tree mofft = wide_int_to_tree (sizetype, moff);
842 if (!poffset)
843 poffset = mofft;
844 else
845 poffset = size_binop (PLUS_EXPR, poffset, mofft);
846 }
847 base = TREE_OPERAND (base, 0);
848 }
849 else
850 base = build_fold_addr_expr (base);
851
852 if (in_loop)
853 {
854 if (!simple_iv (loop, loop, base, &base_iv, true))
855 {
856 if (dump_file && (dump_flags & TDF_DETAILS))
857 fprintf (dump_file, "failed: evolution of base is not affine.\n");
858 return false;
859 }
860 }
861 else
862 {
863 base_iv.base = base;
864 base_iv.step = ssize_int (0);
865 base_iv.no_overflow = true;
866 }
867
868 if (!poffset)
869 {
870 offset_iv.base = ssize_int (0);
871 offset_iv.step = ssize_int (0);
872 }
873 else
874 {
875 if (!in_loop)
876 {
877 offset_iv.base = poffset;
878 offset_iv.step = ssize_int (0);
879 }
880 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
881 {
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
884 return false;
885 }
886 }
887
888 init = ssize_int (pbitpos / BITS_PER_UNIT);
889
890 /* Subtract any constant component from the base and add it to INIT instead.
891 Adjust the misalignment to reflect the amount we subtracted. */
892 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
893 init = size_binop (PLUS_EXPR, init, dinit);
894 base_misalignment -= TREE_INT_CST_LOW (dinit);
895
896 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
897 init = size_binop (PLUS_EXPR, init, dinit);
898
899 step = size_binop (PLUS_EXPR,
900 fold_convert (ssizetype, base_iv.step),
901 fold_convert (ssizetype, offset_iv.step));
902
903 base = canonicalize_base_object_address (base_iv.base);
904
905 /* See if get_pointer_alignment can guarantee a higher alignment than
906 the one we calculated above. */
907 unsigned int HOST_WIDE_INT alt_misalignment;
908 unsigned int alt_alignment;
909 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
910
911 /* As above, these values must be whole bytes. */
912 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
913 && alt_misalignment % BITS_PER_UNIT == 0);
914 alt_alignment /= BITS_PER_UNIT;
915 alt_misalignment /= BITS_PER_UNIT;
916
917 if (base_alignment < alt_alignment)
918 {
919 base_alignment = alt_alignment;
920 base_misalignment = alt_misalignment;
921 }
922
923 drb->base_address = base;
924 drb->offset = fold_convert (ssizetype, offset_iv.base);
925 drb->init = init;
926 drb->step = step;
927 drb->base_alignment = base_alignment;
928 drb->base_misalignment = base_misalignment & (base_alignment - 1);
929 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
930 drb->step_alignment = highest_pow2_factor (step);
931
932 if (dump_file && (dump_flags & TDF_DETAILS))
933 fprintf (dump_file, "success.\n");
934
935 return true;
936 }
937
938 /* Return true if OP is a valid component reference for a DR access
939 function. This accepts a subset of what handled_component_p accepts. */
940
941 static bool
942 access_fn_component_p (tree op)
943 {
944 switch (TREE_CODE (op))
945 {
946 case REALPART_EXPR:
947 case IMAGPART_EXPR:
948 case ARRAY_REF:
949 return true;
950
951 case COMPONENT_REF:
952 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
953
954 default:
955 return false;
956 }
957 }
958
959 /* Determines the base object and the list of indices of memory reference
960 DR, analyzed in LOOP and instantiated in loop nest NEST. */
961
962 static void
963 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
964 {
965 vec<tree> access_fns = vNULL;
966 tree ref, op;
967 tree base, off, access_fn;
968 basic_block before_loop;
969
970 /* If analyzing a basic-block there are no indices to analyze
971 and thus no access functions. */
972 if (!nest)
973 {
974 DR_BASE_OBJECT (dr) = DR_REF (dr);
975 DR_ACCESS_FNS (dr).create (0);
976 return;
977 }
978
979 ref = DR_REF (dr);
980 before_loop = block_before_loop (nest);
981
982 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
983 into a two element array with a constant index. The base is
984 then just the immediate underlying object. */
985 if (TREE_CODE (ref) == REALPART_EXPR)
986 {
987 ref = TREE_OPERAND (ref, 0);
988 access_fns.safe_push (integer_zero_node);
989 }
990 else if (TREE_CODE (ref) == IMAGPART_EXPR)
991 {
992 ref = TREE_OPERAND (ref, 0);
993 access_fns.safe_push (integer_one_node);
994 }
995
996 /* Analyze access functions of dimensions we know to be independent.
997 The list of component references handled here should be kept in
998 sync with access_fn_component_p. */
999 while (handled_component_p (ref))
1000 {
1001 if (TREE_CODE (ref) == ARRAY_REF)
1002 {
1003 op = TREE_OPERAND (ref, 1);
1004 access_fn = analyze_scalar_evolution (loop, op);
1005 access_fn = instantiate_scev (before_loop, loop, access_fn);
1006 access_fns.safe_push (access_fn);
1007 }
1008 else if (TREE_CODE (ref) == COMPONENT_REF
1009 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1010 {
1011 /* For COMPONENT_REFs of records (but not unions!) use the
1012 FIELD_DECL offset as constant access function so we can
1013 disambiguate a[i].f1 and a[i].f2. */
1014 tree off = component_ref_field_offset (ref);
1015 off = size_binop (PLUS_EXPR,
1016 size_binop (MULT_EXPR,
1017 fold_convert (bitsizetype, off),
1018 bitsize_int (BITS_PER_UNIT)),
1019 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1020 access_fns.safe_push (off);
1021 }
1022 else
1023 /* If we have an unhandled component we could not translate
1024 to an access function stop analyzing. We have determined
1025 our base object in this case. */
1026 break;
1027
1028 ref = TREE_OPERAND (ref, 0);
1029 }
1030
1031 /* If the address operand of a MEM_REF base has an evolution in the
1032 analyzed nest, add it as an additional independent access-function. */
1033 if (TREE_CODE (ref) == MEM_REF)
1034 {
1035 op = TREE_OPERAND (ref, 0);
1036 access_fn = analyze_scalar_evolution (loop, op);
1037 access_fn = instantiate_scev (before_loop, loop, access_fn);
1038 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1039 {
1040 tree orig_type;
1041 tree memoff = TREE_OPERAND (ref, 1);
1042 base = initial_condition (access_fn);
1043 orig_type = TREE_TYPE (base);
1044 STRIP_USELESS_TYPE_CONVERSION (base);
1045 split_constant_offset (base, &base, &off);
1046 STRIP_USELESS_TYPE_CONVERSION (base);
1047 /* Fold the MEM_REF offset into the evolutions initial
1048 value to make more bases comparable. */
1049 if (!integer_zerop (memoff))
1050 {
1051 off = size_binop (PLUS_EXPR, off,
1052 fold_convert (ssizetype, memoff));
1053 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1054 }
1055 /* Adjust the offset so it is a multiple of the access type
1056 size and thus we separate bases that can possibly be used
1057 to produce partial overlaps (which the access_fn machinery
1058 cannot handle). */
1059 wide_int rem;
1060 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1061 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1062 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1063 rem = wi::mod_trunc (off, TYPE_SIZE_UNIT (TREE_TYPE (ref)), SIGNED);
1064 else
1065 /* If we can't compute the remainder simply force the initial
1066 condition to zero. */
1067 rem = off;
1068 off = wide_int_to_tree (ssizetype, wi::sub (off, rem));
1069 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1070 /* And finally replace the initial condition. */
1071 access_fn = chrec_replace_initial_condition
1072 (access_fn, fold_convert (orig_type, off));
1073 /* ??? This is still not a suitable base object for
1074 dr_may_alias_p - the base object needs to be an
1075 access that covers the object as whole. With
1076 an evolution in the pointer this cannot be
1077 guaranteed.
1078 As a band-aid, mark the access so we can special-case
1079 it in dr_may_alias_p. */
1080 tree old = ref;
1081 ref = fold_build2_loc (EXPR_LOCATION (ref),
1082 MEM_REF, TREE_TYPE (ref),
1083 base, memoff);
1084 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1085 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1086 DR_UNCONSTRAINED_BASE (dr) = true;
1087 access_fns.safe_push (access_fn);
1088 }
1089 }
1090 else if (DECL_P (ref))
1091 {
1092 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1093 ref = build2 (MEM_REF, TREE_TYPE (ref),
1094 build_fold_addr_expr (ref),
1095 build_int_cst (reference_alias_ptr_type (ref), 0));
1096 }
1097
1098 DR_BASE_OBJECT (dr) = ref;
1099 DR_ACCESS_FNS (dr) = access_fns;
1100 }
1101
1102 /* Extracts the alias analysis information from the memory reference DR. */
1103
1104 static void
1105 dr_analyze_alias (struct data_reference *dr)
1106 {
1107 tree ref = DR_REF (dr);
1108 tree base = get_base_address (ref), addr;
1109
1110 if (INDIRECT_REF_P (base)
1111 || TREE_CODE (base) == MEM_REF)
1112 {
1113 addr = TREE_OPERAND (base, 0);
1114 if (TREE_CODE (addr) == SSA_NAME)
1115 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1116 }
1117 }
1118
1119 /* Frees data reference DR. */
1120
1121 void
1122 free_data_ref (data_reference_p dr)
1123 {
1124 DR_ACCESS_FNS (dr).release ();
1125 free (dr);
1126 }
1127
1128 /* Analyze memory reference MEMREF, which is accessed in STMT.
1129 The reference is a read if IS_READ is true, otherwise it is a write.
1130 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1131 within STMT, i.e. that it might not occur even if STMT is executed
1132 and runs to completion.
1133
1134 Return the data_reference description of MEMREF. NEST is the outermost
1135 loop in which the reference should be instantiated, LOOP is the loop
1136 in which the data reference should be analyzed. */
1137
1138 struct data_reference *
1139 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt,
1140 bool is_read, bool is_conditional_in_stmt)
1141 {
1142 struct data_reference *dr;
1143
1144 if (dump_file && (dump_flags & TDF_DETAILS))
1145 {
1146 fprintf (dump_file, "Creating dr for ");
1147 print_generic_expr (dump_file, memref, TDF_SLIM);
1148 fprintf (dump_file, "\n");
1149 }
1150
1151 dr = XCNEW (struct data_reference);
1152 DR_STMT (dr) = stmt;
1153 DR_REF (dr) = memref;
1154 DR_IS_READ (dr) = is_read;
1155 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1156
1157 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1158 nest != NULL ? loop : NULL);
1159 dr_analyze_indices (dr, nest, loop);
1160 dr_analyze_alias (dr);
1161
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 {
1164 unsigned i;
1165 fprintf (dump_file, "\tbase_address: ");
1166 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1167 fprintf (dump_file, "\n\toffset from base address: ");
1168 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1169 fprintf (dump_file, "\n\tconstant offset from base address: ");
1170 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1171 fprintf (dump_file, "\n\tstep: ");
1172 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1173 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1174 fprintf (dump_file, "\n\tbase misalignment: %d",
1175 DR_BASE_MISALIGNMENT (dr));
1176 fprintf (dump_file, "\n\toffset alignment: %d",
1177 DR_OFFSET_ALIGNMENT (dr));
1178 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1179 fprintf (dump_file, "\n\tbase_object: ");
1180 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1181 fprintf (dump_file, "\n");
1182 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1183 {
1184 fprintf (dump_file, "\tAccess function %d: ", i);
1185 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1186 }
1187 }
1188
1189 return dr;
1190 }
1191
1192 /* A helper function computes order between two tree epxressions T1 and T2.
1193 This is used in comparator functions sorting objects based on the order
1194 of tree expressions. The function returns -1, 0, or 1. */
1195
1196 int
1197 data_ref_compare_tree (tree t1, tree t2)
1198 {
1199 int i, cmp;
1200 enum tree_code code;
1201 char tclass;
1202
1203 if (t1 == t2)
1204 return 0;
1205 if (t1 == NULL)
1206 return -1;
1207 if (t2 == NULL)
1208 return 1;
1209
1210 STRIP_NOPS (t1);
1211 STRIP_NOPS (t2);
1212
1213 if (TREE_CODE (t1) != TREE_CODE (t2))
1214 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1215
1216 code = TREE_CODE (t1);
1217 switch (code)
1218 {
1219 /* For const values, we can just use hash values for comparisons. */
1220 case INTEGER_CST:
1221 case REAL_CST:
1222 case FIXED_CST:
1223 case STRING_CST:
1224 case COMPLEX_CST:
1225 case VECTOR_CST:
1226 {
1227 hashval_t h1 = iterative_hash_expr (t1, 0);
1228 hashval_t h2 = iterative_hash_expr (t2, 0);
1229 if (h1 != h2)
1230 return h1 < h2 ? -1 : 1;
1231 break;
1232 }
1233
1234 case SSA_NAME:
1235 cmp = data_ref_compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
1236 if (cmp != 0)
1237 return cmp;
1238
1239 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1240 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1241 break;
1242
1243 default:
1244 tclass = TREE_CODE_CLASS (code);
1245
1246 /* For var-decl, we could compare their UIDs. */
1247 if (tclass == tcc_declaration)
1248 {
1249 if (DECL_UID (t1) != DECL_UID (t2))
1250 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1251 break;
1252 }
1253
1254 /* For expressions with operands, compare their operands recursively. */
1255 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1256 {
1257 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1258 TREE_OPERAND (t2, i));
1259 if (cmp != 0)
1260 return cmp;
1261 }
1262 }
1263
1264 return 0;
1265 }
1266
1267 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1268 check. */
1269
1270 bool
1271 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1272 {
1273 if (dump_enabled_p ())
1274 {
1275 dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1276 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1277 dump_printf (MSG_NOTE, " and ");
1278 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1279 dump_printf (MSG_NOTE, "\n");
1280 }
1281
1282 if (!speed_p)
1283 {
1284 if (dump_enabled_p ())
1285 dump_printf (MSG_MISSED_OPTIMIZATION,
1286 "runtime alias check not supported when optimizing "
1287 "for size.\n");
1288 return false;
1289 }
1290
1291 /* FORNOW: We don't support versioning with outer-loop in either
1292 vectorization or loop distribution. */
1293 if (loop != NULL && loop->inner != NULL)
1294 {
1295 if (dump_enabled_p ())
1296 dump_printf (MSG_MISSED_OPTIMIZATION,
1297 "runtime alias check not supported for outer loop.\n");
1298 return false;
1299 }
1300
1301 /* FORNOW: We don't support creating runtime alias tests for non-constant
1302 step. */
1303 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1304 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1305 {
1306 if (dump_enabled_p ())
1307 dump_printf (MSG_MISSED_OPTIMIZATION,
1308 "runtime alias check not supported for non-constant "
1309 "step\n");
1310 return false;
1311 }
1312
1313 return true;
1314 }
1315
1316 /* Operator == between two dr_with_seg_len objects.
1317
1318 This equality operator is used to make sure two data refs
1319 are the same one so that we will consider to combine the
1320 aliasing checks of those two pairs of data dependent data
1321 refs. */
1322
1323 static bool
1324 operator == (const dr_with_seg_len& d1,
1325 const dr_with_seg_len& d2)
1326 {
1327 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1328 DR_BASE_ADDRESS (d2.dr), 0)
1329 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1330 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1331 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1332 }
1333
1334 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1335 so that we can combine aliasing checks in one scan. */
1336
1337 static int
1338 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1339 {
1340 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1341 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1342 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1343 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1344
1345 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1346 if a and c have the same basic address snd step, and b and d have the same
1347 address and step. Therefore, if any a&c or b&d don't have the same address
1348 and step, we don't care the order of those two pairs after sorting. */
1349 int comp_res;
1350
1351 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1352 DR_BASE_ADDRESS (b1.dr))) != 0)
1353 return comp_res;
1354 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1355 DR_BASE_ADDRESS (b2.dr))) != 0)
1356 return comp_res;
1357 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1358 DR_STEP (b1.dr))) != 0)
1359 return comp_res;
1360 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1361 DR_STEP (b2.dr))) != 0)
1362 return comp_res;
1363 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1364 DR_OFFSET (b1.dr))) != 0)
1365 return comp_res;
1366 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1367 DR_INIT (b1.dr))) != 0)
1368 return comp_res;
1369 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1370 DR_OFFSET (b2.dr))) != 0)
1371 return comp_res;
1372 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1373 DR_INIT (b2.dr))) != 0)
1374 return comp_res;
1375
1376 return 0;
1377 }
1378
1379 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1380 FACTOR is number of iterations that each data reference is accessed.
1381
1382 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1383 we create an expression:
1384
1385 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1386 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1387
1388 for aliasing checks. However, in some cases we can decrease the number
1389 of checks by combining two checks into one. For example, suppose we have
1390 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1391 condition is satisfied:
1392
1393 load_ptr_0 < load_ptr_1 &&
1394 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1395
1396 (this condition means, in each iteration of vectorized loop, the accessed
1397 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1398 load_ptr_1.)
1399
1400 we then can use only the following expression to finish the alising checks
1401 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1402
1403 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1404 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1405
1406 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1407 basic address. */
1408
1409 void
1410 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1411 unsigned HOST_WIDE_INT factor)
1412 {
1413 /* Sort the collected data ref pairs so that we can scan them once to
1414 combine all possible aliasing checks. */
1415 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1416
1417 /* Scan the sorted dr pairs and check if we can combine alias checks
1418 of two neighboring dr pairs. */
1419 for (size_t i = 1; i < alias_pairs->length (); ++i)
1420 {
1421 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1422 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1423 *dr_b1 = &(*alias_pairs)[i-1].second,
1424 *dr_a2 = &(*alias_pairs)[i].first,
1425 *dr_b2 = &(*alias_pairs)[i].second;
1426
1427 /* Remove duplicate data ref pairs. */
1428 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1429 {
1430 if (dump_enabled_p ())
1431 {
1432 dump_printf (MSG_NOTE, "found equal ranges ");
1433 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1434 dump_printf (MSG_NOTE, ", ");
1435 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1436 dump_printf (MSG_NOTE, " and ");
1437 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1438 dump_printf (MSG_NOTE, ", ");
1439 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1440 dump_printf (MSG_NOTE, "\n");
1441 }
1442 alias_pairs->ordered_remove (i--);
1443 continue;
1444 }
1445
1446 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1447 {
1448 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1449 and DR_A1 and DR_A2 are two consecutive memrefs. */
1450 if (*dr_a1 == *dr_a2)
1451 {
1452 std::swap (dr_a1, dr_b1);
1453 std::swap (dr_a2, dr_b2);
1454 }
1455
1456 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1457 DR_BASE_ADDRESS (dr_a2->dr), 0)
1458 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1459 DR_OFFSET (dr_a2->dr), 0)
1460 || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
1461 || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
1462 continue;
1463
1464 /* Only merge const step data references. */
1465 if (TREE_CODE (DR_STEP (dr_a1->dr)) != INTEGER_CST
1466 || TREE_CODE (DR_STEP (dr_a2->dr)) != INTEGER_CST)
1467 continue;
1468
1469 /* DR_A1 and DR_A2 must goes in the same direction. */
1470 if (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node)
1471 != tree_int_cst_compare (DR_STEP (dr_a2->dr), size_zero_node))
1472 continue;
1473
1474 bool neg_step
1475 = (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node) < 0);
1476
1477 /* We need to compute merged segment length at compilation time for
1478 dr_a1 and dr_a2, which is impossible if either one has non-const
1479 segment length. */
1480 if ((!tree_fits_uhwi_p (dr_a1->seg_len)
1481 || !tree_fits_uhwi_p (dr_a2->seg_len))
1482 && tree_int_cst_compare (DR_STEP (dr_a1->dr),
1483 DR_STEP (dr_a2->dr)) != 0)
1484 continue;
1485
1486 /* Make sure dr_a1 starts left of dr_a2. */
1487 if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
1488 std::swap (*dr_a1, *dr_a2);
1489
1490 bool do_remove = false;
1491 wide_int diff = wi::sub (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr));
1492 wide_int min_seg_len_b;
1493 tree new_seg_len;
1494
1495 if (TREE_CODE (dr_b1->seg_len) == INTEGER_CST)
1496 min_seg_len_b = wi::abs (dr_b1->seg_len);
1497 else
1498 min_seg_len_b = wi::mul (factor, wi::abs (DR_STEP (dr_b1->dr)));
1499
1500 /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1501
1502 Case A:
1503 check if the following condition is satisfied:
1504
1505 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1506
1507 where DIFF = DR_A2_INIT - DR_A1_INIT. However,
1508 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1509 have to make a best estimation. We can get the minimum value
1510 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1511 then either of the following two conditions can guarantee the
1512 one above:
1513
1514 1: DIFF <= MIN_SEG_LEN_B
1515 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1516 Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1517 to take care of wrapping behavior in it.
1518
1519 Case B:
1520 If the left segment does not extend beyond the start of the
1521 right segment the new segment length is that of the right
1522 plus the segment distance. The condition is like:
1523
1524 DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
1525
1526 Note 1: Case A.2 and B combined together effectively merges every
1527 dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1528
1529 Note 2: Above description is based on positive DR_STEP, we need to
1530 take care of negative DR_STEP for wrapping behavior. See PR80815
1531 for more information. */
1532 if (neg_step)
1533 {
1534 /* Adjust diff according to access size of both references. */
1535 tree size_a1 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)));
1536 tree size_a2 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)));
1537 diff = wi::add (diff, wi::sub (size_a2, size_a1));
1538 /* Case A.1. */
1539 if (wi::leu_p (diff, min_seg_len_b)
1540 /* Case A.2 and B combined. */
1541 || (tree_fits_uhwi_p (dr_a2->seg_len)))
1542 {
1543 if (tree_fits_uhwi_p (dr_a1->seg_len)
1544 && tree_fits_uhwi_p (dr_a2->seg_len))
1545 new_seg_len
1546 = wide_int_to_tree (sizetype,
1547 wi::umin (wi::sub (dr_a1->seg_len,
1548 diff),
1549 dr_a2->seg_len));
1550 else
1551 new_seg_len
1552 = size_binop (MINUS_EXPR, dr_a2->seg_len,
1553 wide_int_to_tree (sizetype, diff));
1554
1555 dr_a2->seg_len = new_seg_len;
1556 do_remove = true;
1557 }
1558 }
1559 else
1560 {
1561 /* Case A.1. */
1562 if (wi::leu_p (diff, min_seg_len_b)
1563 /* Case A.2 and B combined. */
1564 || (tree_fits_uhwi_p (dr_a1->seg_len)))
1565 {
1566 if (tree_fits_uhwi_p (dr_a1->seg_len)
1567 && tree_fits_uhwi_p (dr_a2->seg_len))
1568 new_seg_len
1569 = wide_int_to_tree (sizetype,
1570 wi::umax (wi::add (dr_a2->seg_len,
1571 diff),
1572 dr_a1->seg_len));
1573 else
1574 new_seg_len
1575 = size_binop (PLUS_EXPR, dr_a2->seg_len,
1576 wide_int_to_tree (sizetype, diff));
1577
1578 dr_a1->seg_len = new_seg_len;
1579 do_remove = true;
1580 }
1581 }
1582
1583 if (do_remove)
1584 {
1585 if (dump_enabled_p ())
1586 {
1587 dump_printf (MSG_NOTE, "merging ranges for ");
1588 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1589 dump_printf (MSG_NOTE, ", ");
1590 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1591 dump_printf (MSG_NOTE, " and ");
1592 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1593 dump_printf (MSG_NOTE, ", ");
1594 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1595 dump_printf (MSG_NOTE, "\n");
1596 }
1597 alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1598 i--;
1599 }
1600 }
1601 }
1602 }
1603
1604 /* Given LOOP's two data references and segment lengths described by DR_A
1605 and DR_B, create expression checking if the two addresses ranges intersect
1606 with each other based on index of the two addresses. This can only be
1607 done if DR_A and DR_B referring to the same (array) object and the index
1608 is the only difference. For example:
1609
1610 DR_A DR_B
1611 data-ref arr[i] arr[j]
1612 base_object arr arr
1613 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1614
1615 The addresses and their index are like:
1616
1617 |<- ADDR_A ->| |<- ADDR_B ->|
1618 ------------------------------------------------------->
1619 | | | | | | | | | |
1620 ------------------------------------------------------->
1621 i_0 ... i_0+4 j_0 ... j_0+4
1622
1623 We can create expression based on index rather than address:
1624
1625 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1626
1627 Note evolution step of index needs to be considered in comparison. */
1628
1629 static bool
1630 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1631 const dr_with_seg_len& dr_a,
1632 const dr_with_seg_len& dr_b)
1633 {
1634 if (integer_zerop (DR_STEP (dr_a.dr))
1635 || integer_zerop (DR_STEP (dr_b.dr))
1636 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1637 return false;
1638
1639 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1640 return false;
1641
1642 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1643 return false;
1644
1645 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1646 return false;
1647
1648 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1649 return false;
1650
1651 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1652
1653 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1654 unsigned HOST_WIDE_INT abs_step
1655 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1656
1657 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1658 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1659 /* Infer the number of iterations with which the memory segment is accessed
1660 by DR. In other words, alias is checked if memory segment accessed by
1661 DR_A in some iterations intersect with memory segment accessed by DR_B
1662 in the same amount iterations.
1663 Note segnment length is a linear function of number of iterations with
1664 DR_STEP as the coefficient. */
1665 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1666 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1667
1668 unsigned int i;
1669 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1670 {
1671 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1672 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1673 /* Two indices must be the same if they are not scev, or not scev wrto
1674 current loop being vecorized. */
1675 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1676 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1677 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1678 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1679 {
1680 if (operand_equal_p (access1, access2, 0))
1681 continue;
1682
1683 return false;
1684 }
1685 /* The two indices must have the same step. */
1686 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1687 return false;
1688
1689 tree idx_step = CHREC_RIGHT (access1);
1690 /* Index must have const step, otherwise DR_STEP won't be constant. */
1691 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1692 /* Index must evaluate in the same direction as DR. */
1693 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1694
1695 tree min1 = CHREC_LEFT (access1);
1696 tree min2 = CHREC_LEFT (access2);
1697 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1698 return false;
1699
1700 /* Ideally, alias can be checked against loop's control IV, but we
1701 need to prove linear mapping between control IV and reference
1702 index. Although that should be true, we check against (array)
1703 index of data reference. Like segment length, index length is
1704 linear function of the number of iterations with index_step as
1705 the coefficient, i.e, niter_len * idx_step. */
1706 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1707 build_int_cst (TREE_TYPE (min1),
1708 niter_len1));
1709 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1710 build_int_cst (TREE_TYPE (min2),
1711 niter_len2));
1712 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1713 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1714 /* Adjust ranges for negative step. */
1715 if (neg_step)
1716 {
1717 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1718 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1719 CHREC_LEFT (access1), idx_step);
1720 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1721 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1722 CHREC_LEFT (access2), idx_step);
1723 }
1724 tree part_cond_expr
1725 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1726 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1727 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1728 if (*cond_expr)
1729 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1730 *cond_expr, part_cond_expr);
1731 else
1732 *cond_expr = part_cond_expr;
1733 }
1734 return true;
1735 }
1736
1737 /* Given two data references and segment lengths described by DR_A and DR_B,
1738 create expression checking if the two addresses ranges intersect with
1739 each other:
1740
1741 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1742 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1743
1744 static void
1745 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1746 const dr_with_seg_len& dr_a,
1747 const dr_with_seg_len& dr_b)
1748 {
1749 *cond_expr = NULL_TREE;
1750 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1751 return;
1752
1753 tree segment_length_a = dr_a.seg_len;
1754 tree segment_length_b = dr_b.seg_len;
1755 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1756 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1757 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1758
1759 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1760 offset_a, DR_INIT (dr_a.dr));
1761 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1762 offset_b, DR_INIT (dr_b.dr));
1763 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1764 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1765
1766 tree seg_a_min = addr_base_a;
1767 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1768 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1769 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1770 [a, a+12) */
1771 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1772 {
1773 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1774 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1775 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1776 }
1777
1778 tree seg_b_min = addr_base_b;
1779 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1780 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1781 {
1782 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1783 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1784 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1785 }
1786 *cond_expr
1787 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1788 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1789 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1790 }
1791
1792 /* Create a conditional expression that represents the run-time checks for
1793 overlapping of address ranges represented by a list of data references
1794 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1795 COND_EXPR is the conditional expression to be used in the if statement
1796 that controls which version of the loop gets executed at runtime. */
1797
1798 void
1799 create_runtime_alias_checks (struct loop *loop,
1800 vec<dr_with_seg_len_pair_t> *alias_pairs,
1801 tree * cond_expr)
1802 {
1803 tree part_cond_expr;
1804
1805 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1806 {
1807 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1808 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1809
1810 if (dump_enabled_p ())
1811 {
1812 dump_printf (MSG_NOTE, "create runtime check for data references ");
1813 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1814 dump_printf (MSG_NOTE, " and ");
1815 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1816 dump_printf (MSG_NOTE, "\n");
1817 }
1818
1819 /* Create condition expression for each pair data references. */
1820 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1821 if (*cond_expr)
1822 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1823 *cond_expr, part_cond_expr);
1824 else
1825 *cond_expr = part_cond_expr;
1826 }
1827 }
1828
1829 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1830 expressions. */
1831 static bool
1832 dr_equal_offsets_p1 (tree offset1, tree offset2)
1833 {
1834 bool res;
1835
1836 STRIP_NOPS (offset1);
1837 STRIP_NOPS (offset2);
1838
1839 if (offset1 == offset2)
1840 return true;
1841
1842 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1843 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1844 return false;
1845
1846 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1847 TREE_OPERAND (offset2, 0));
1848
1849 if (!res || !BINARY_CLASS_P (offset1))
1850 return res;
1851
1852 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1853 TREE_OPERAND (offset2, 1));
1854
1855 return res;
1856 }
1857
1858 /* Check if DRA and DRB have equal offsets. */
1859 bool
1860 dr_equal_offsets_p (struct data_reference *dra,
1861 struct data_reference *drb)
1862 {
1863 tree offset1, offset2;
1864
1865 offset1 = DR_OFFSET (dra);
1866 offset2 = DR_OFFSET (drb);
1867
1868 return dr_equal_offsets_p1 (offset1, offset2);
1869 }
1870
1871 /* Returns true if FNA == FNB. */
1872
1873 static bool
1874 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1875 {
1876 unsigned i, n = fna.length ();
1877
1878 if (n != fnb.length ())
1879 return false;
1880
1881 for (i = 0; i < n; i++)
1882 if (!operand_equal_p (fna[i], fnb[i], 0))
1883 return false;
1884
1885 return true;
1886 }
1887
1888 /* If all the functions in CF are the same, returns one of them,
1889 otherwise returns NULL. */
1890
1891 static affine_fn
1892 common_affine_function (conflict_function *cf)
1893 {
1894 unsigned i;
1895 affine_fn comm;
1896
1897 if (!CF_NONTRIVIAL_P (cf))
1898 return affine_fn ();
1899
1900 comm = cf->fns[0];
1901
1902 for (i = 1; i < cf->n; i++)
1903 if (!affine_function_equal_p (comm, cf->fns[i]))
1904 return affine_fn ();
1905
1906 return comm;
1907 }
1908
1909 /* Returns the base of the affine function FN. */
1910
1911 static tree
1912 affine_function_base (affine_fn fn)
1913 {
1914 return fn[0];
1915 }
1916
1917 /* Returns true if FN is a constant. */
1918
1919 static bool
1920 affine_function_constant_p (affine_fn fn)
1921 {
1922 unsigned i;
1923 tree coef;
1924
1925 for (i = 1; fn.iterate (i, &coef); i++)
1926 if (!integer_zerop (coef))
1927 return false;
1928
1929 return true;
1930 }
1931
1932 /* Returns true if FN is the zero constant function. */
1933
1934 static bool
1935 affine_function_zero_p (affine_fn fn)
1936 {
1937 return (integer_zerop (affine_function_base (fn))
1938 && affine_function_constant_p (fn));
1939 }
1940
1941 /* Returns a signed integer type with the largest precision from TA
1942 and TB. */
1943
1944 static tree
1945 signed_type_for_types (tree ta, tree tb)
1946 {
1947 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1948 return signed_type_for (ta);
1949 else
1950 return signed_type_for (tb);
1951 }
1952
1953 /* Applies operation OP on affine functions FNA and FNB, and returns the
1954 result. */
1955
1956 static affine_fn
1957 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1958 {
1959 unsigned i, n, m;
1960 affine_fn ret;
1961 tree coef;
1962
1963 if (fnb.length () > fna.length ())
1964 {
1965 n = fna.length ();
1966 m = fnb.length ();
1967 }
1968 else
1969 {
1970 n = fnb.length ();
1971 m = fna.length ();
1972 }
1973
1974 ret.create (m);
1975 for (i = 0; i < n; i++)
1976 {
1977 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1978 TREE_TYPE (fnb[i]));
1979 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1980 }
1981
1982 for (; fna.iterate (i, &coef); i++)
1983 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1984 coef, integer_zero_node));
1985 for (; fnb.iterate (i, &coef); i++)
1986 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1987 integer_zero_node, coef));
1988
1989 return ret;
1990 }
1991
1992 /* Returns the sum of affine functions FNA and FNB. */
1993
1994 static affine_fn
1995 affine_fn_plus (affine_fn fna, affine_fn fnb)
1996 {
1997 return affine_fn_op (PLUS_EXPR, fna, fnb);
1998 }
1999
2000 /* Returns the difference of affine functions FNA and FNB. */
2001
2002 static affine_fn
2003 affine_fn_minus (affine_fn fna, affine_fn fnb)
2004 {
2005 return affine_fn_op (MINUS_EXPR, fna, fnb);
2006 }
2007
2008 /* Frees affine function FN. */
2009
2010 static void
2011 affine_fn_free (affine_fn fn)
2012 {
2013 fn.release ();
2014 }
2015
2016 /* Determine for each subscript in the data dependence relation DDR
2017 the distance. */
2018
2019 static void
2020 compute_subscript_distance (struct data_dependence_relation *ddr)
2021 {
2022 conflict_function *cf_a, *cf_b;
2023 affine_fn fn_a, fn_b, diff;
2024
2025 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2026 {
2027 unsigned int i;
2028
2029 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2030 {
2031 struct subscript *subscript;
2032
2033 subscript = DDR_SUBSCRIPT (ddr, i);
2034 cf_a = SUB_CONFLICTS_IN_A (subscript);
2035 cf_b = SUB_CONFLICTS_IN_B (subscript);
2036
2037 fn_a = common_affine_function (cf_a);
2038 fn_b = common_affine_function (cf_b);
2039 if (!fn_a.exists () || !fn_b.exists ())
2040 {
2041 SUB_DISTANCE (subscript) = chrec_dont_know;
2042 return;
2043 }
2044 diff = affine_fn_minus (fn_a, fn_b);
2045
2046 if (affine_function_constant_p (diff))
2047 SUB_DISTANCE (subscript) = affine_function_base (diff);
2048 else
2049 SUB_DISTANCE (subscript) = chrec_dont_know;
2050
2051 affine_fn_free (diff);
2052 }
2053 }
2054 }
2055
2056 /* Returns the conflict function for "unknown". */
2057
2058 static conflict_function *
2059 conflict_fn_not_known (void)
2060 {
2061 conflict_function *fn = XCNEW (conflict_function);
2062 fn->n = NOT_KNOWN;
2063
2064 return fn;
2065 }
2066
2067 /* Returns the conflict function for "independent". */
2068
2069 static conflict_function *
2070 conflict_fn_no_dependence (void)
2071 {
2072 conflict_function *fn = XCNEW (conflict_function);
2073 fn->n = NO_DEPENDENCE;
2074
2075 return fn;
2076 }
2077
2078 /* Returns true if the address of OBJ is invariant in LOOP. */
2079
2080 static bool
2081 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2082 {
2083 while (handled_component_p (obj))
2084 {
2085 if (TREE_CODE (obj) == ARRAY_REF)
2086 {
2087 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2088 need to check the stride and the lower bound of the reference. */
2089 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2090 loop->num)
2091 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2092 loop->num))
2093 return false;
2094 }
2095 else if (TREE_CODE (obj) == COMPONENT_REF)
2096 {
2097 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2098 loop->num))
2099 return false;
2100 }
2101 obj = TREE_OPERAND (obj, 0);
2102 }
2103
2104 if (!INDIRECT_REF_P (obj)
2105 && TREE_CODE (obj) != MEM_REF)
2106 return true;
2107
2108 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2109 loop->num);
2110 }
2111
2112 /* Returns false if we can prove that data references A and B do not alias,
2113 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2114 considered. */
2115
2116 bool
2117 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2118 bool loop_nest)
2119 {
2120 tree addr_a = DR_BASE_OBJECT (a);
2121 tree addr_b = DR_BASE_OBJECT (b);
2122
2123 /* If we are not processing a loop nest but scalar code we
2124 do not need to care about possible cross-iteration dependences
2125 and thus can process the full original reference. Do so,
2126 similar to how loop invariant motion applies extra offset-based
2127 disambiguation. */
2128 if (!loop_nest)
2129 {
2130 aff_tree off1, off2;
2131 widest_int size1, size2;
2132 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2133 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2134 aff_combination_scale (&off1, -1);
2135 aff_combination_add (&off2, &off1);
2136 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2137 return false;
2138 }
2139
2140 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2141 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2142 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2143 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2144 return false;
2145
2146 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2147 do not know the size of the base-object. So we cannot do any
2148 offset/overlap based analysis but have to rely on points-to
2149 information only. */
2150 if (TREE_CODE (addr_a) == MEM_REF
2151 && (DR_UNCONSTRAINED_BASE (a)
2152 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2153 {
2154 /* For true dependences we can apply TBAA. */
2155 if (flag_strict_aliasing
2156 && DR_IS_WRITE (a) && DR_IS_READ (b)
2157 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2158 get_alias_set (DR_REF (b))))
2159 return false;
2160 if (TREE_CODE (addr_b) == MEM_REF)
2161 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2162 TREE_OPERAND (addr_b, 0));
2163 else
2164 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2165 build_fold_addr_expr (addr_b));
2166 }
2167 else if (TREE_CODE (addr_b) == MEM_REF
2168 && (DR_UNCONSTRAINED_BASE (b)
2169 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2170 {
2171 /* For true dependences we can apply TBAA. */
2172 if (flag_strict_aliasing
2173 && DR_IS_WRITE (a) && DR_IS_READ (b)
2174 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2175 get_alias_set (DR_REF (b))))
2176 return false;
2177 if (TREE_CODE (addr_a) == MEM_REF)
2178 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2179 TREE_OPERAND (addr_b, 0));
2180 else
2181 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2182 TREE_OPERAND (addr_b, 0));
2183 }
2184
2185 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2186 that is being subsetted in the loop nest. */
2187 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2188 return refs_output_dependent_p (addr_a, addr_b);
2189 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2190 return refs_anti_dependent_p (addr_a, addr_b);
2191 return refs_may_alias_p (addr_a, addr_b);
2192 }
2193
2194 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2195 if it is meaningful to compare their associated access functions
2196 when checking for dependencies. */
2197
2198 static bool
2199 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2200 {
2201 /* Allow pairs of component refs from the following sets:
2202
2203 { REALPART_EXPR, IMAGPART_EXPR }
2204 { COMPONENT_REF }
2205 { ARRAY_REF }. */
2206 tree_code code_a = TREE_CODE (ref_a);
2207 tree_code code_b = TREE_CODE (ref_b);
2208 if (code_a == IMAGPART_EXPR)
2209 code_a = REALPART_EXPR;
2210 if (code_b == IMAGPART_EXPR)
2211 code_b = REALPART_EXPR;
2212 if (code_a != code_b)
2213 return false;
2214
2215 if (TREE_CODE (ref_a) == COMPONENT_REF)
2216 /* ??? We cannot simply use the type of operand #0 of the refs here as
2217 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2218 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2219 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2220 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2221
2222 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2223 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2224 }
2225
2226 /* Initialize a data dependence relation between data accesses A and
2227 B. NB_LOOPS is the number of loops surrounding the references: the
2228 size of the classic distance/direction vectors. */
2229
2230 struct data_dependence_relation *
2231 initialize_data_dependence_relation (struct data_reference *a,
2232 struct data_reference *b,
2233 vec<loop_p> loop_nest)
2234 {
2235 struct data_dependence_relation *res;
2236 unsigned int i;
2237
2238 res = XCNEW (struct data_dependence_relation);
2239 DDR_A (res) = a;
2240 DDR_B (res) = b;
2241 DDR_LOOP_NEST (res).create (0);
2242 DDR_SUBSCRIPTS (res).create (0);
2243 DDR_DIR_VECTS (res).create (0);
2244 DDR_DIST_VECTS (res).create (0);
2245
2246 if (a == NULL || b == NULL)
2247 {
2248 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2249 return res;
2250 }
2251
2252 /* If the data references do not alias, then they are independent. */
2253 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2254 {
2255 DDR_ARE_DEPENDENT (res) = chrec_known;
2256 return res;
2257 }
2258
2259 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2260 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2261 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2262 {
2263 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2264 return res;
2265 }
2266
2267 /* For unconstrained bases, the root (highest-indexed) subscript
2268 describes a variation in the base of the original DR_REF rather
2269 than a component access. We have no type that accurately describes
2270 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2271 applying this subscript) so limit the search to the last real
2272 component access.
2273
2274 E.g. for:
2275
2276 void
2277 f (int a[][8], int b[][8])
2278 {
2279 for (int i = 0; i < 8; ++i)
2280 a[i * 2][0] = b[i][0];
2281 }
2282
2283 the a and b accesses have a single ARRAY_REF component reference [0]
2284 but have two subscripts. */
2285 if (DR_UNCONSTRAINED_BASE (a))
2286 num_dimensions_a -= 1;
2287 if (DR_UNCONSTRAINED_BASE (b))
2288 num_dimensions_b -= 1;
2289
2290 /* These structures describe sequences of component references in
2291 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2292 specific access function. */
2293 struct {
2294 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2295 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2296 indices. In C notation, these are the indices of the rightmost
2297 component references; e.g. for a sequence .b.c.d, the start
2298 index is for .d. */
2299 unsigned int start_a;
2300 unsigned int start_b;
2301
2302 /* The sequence contains LENGTH consecutive access functions from
2303 each DR. */
2304 unsigned int length;
2305
2306 /* The enclosing objects for the A and B sequences respectively,
2307 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2308 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2309 tree object_a;
2310 tree object_b;
2311 } full_seq = {}, struct_seq = {};
2312
2313 /* Before each iteration of the loop:
2314
2315 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2316 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2317 unsigned int index_a = 0;
2318 unsigned int index_b = 0;
2319 tree ref_a = DR_REF (a);
2320 tree ref_b = DR_REF (b);
2321
2322 /* Now walk the component references from the final DR_REFs back up to
2323 the enclosing base objects. Each component reference corresponds
2324 to one access function in the DR, with access function 0 being for
2325 the final DR_REF and the highest-indexed access function being the
2326 one that is applied to the base of the DR.
2327
2328 Look for a sequence of component references whose access functions
2329 are comparable (see access_fn_components_comparable_p). If more
2330 than one such sequence exists, pick the one nearest the base
2331 (which is the leftmost sequence in C notation). Store this sequence
2332 in FULL_SEQ.
2333
2334 For example, if we have:
2335
2336 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2337
2338 A: a[0][i].s.c.d
2339 B: __real b[0][i].s.e[i].f
2340
2341 (where d is the same type as the real component of f) then the access
2342 functions would be:
2343
2344 0 1 2 3
2345 A: .d .c .s [i]
2346
2347 0 1 2 3 4 5
2348 B: __real .f [i] .e .s [i]
2349
2350 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2351 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2352 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2353 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2354 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2355 index foo[10] arrays, so is again comparable. The sequence is
2356 therefore:
2357
2358 A: [1, 3] (i.e. [i].s.c)
2359 B: [3, 5] (i.e. [i].s.e)
2360
2361 Also look for sequences of component references whose access
2362 functions are comparable and whose enclosing objects have the same
2363 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2364 example, STRUCT_SEQ would be:
2365
2366 A: [1, 2] (i.e. s.c)
2367 B: [3, 4] (i.e. s.e) */
2368 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2369 {
2370 /* REF_A and REF_B must be one of the component access types
2371 allowed by dr_analyze_indices. */
2372 gcc_checking_assert (access_fn_component_p (ref_a));
2373 gcc_checking_assert (access_fn_component_p (ref_b));
2374
2375 /* Get the immediately-enclosing objects for REF_A and REF_B,
2376 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2377 and DR_ACCESS_FN (B, INDEX_B). */
2378 tree object_a = TREE_OPERAND (ref_a, 0);
2379 tree object_b = TREE_OPERAND (ref_b, 0);
2380
2381 tree type_a = TREE_TYPE (object_a);
2382 tree type_b = TREE_TYPE (object_b);
2383 if (access_fn_components_comparable_p (ref_a, ref_b))
2384 {
2385 /* This pair of component accesses is comparable for dependence
2386 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2387 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2388 if (full_seq.start_a + full_seq.length != index_a
2389 || full_seq.start_b + full_seq.length != index_b)
2390 {
2391 /* The accesses don't extend the current sequence,
2392 so start a new one here. */
2393 full_seq.start_a = index_a;
2394 full_seq.start_b = index_b;
2395 full_seq.length = 0;
2396 }
2397
2398 /* Add this pair of references to the sequence. */
2399 full_seq.length += 1;
2400 full_seq.object_a = object_a;
2401 full_seq.object_b = object_b;
2402
2403 /* If the enclosing objects are structures (and thus have the
2404 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2405 if (TREE_CODE (type_a) == RECORD_TYPE)
2406 struct_seq = full_seq;
2407
2408 /* Move to the next containing reference for both A and B. */
2409 ref_a = object_a;
2410 ref_b = object_b;
2411 index_a += 1;
2412 index_b += 1;
2413 continue;
2414 }
2415
2416 /* Try to approach equal type sizes. */
2417 if (!COMPLETE_TYPE_P (type_a)
2418 || !COMPLETE_TYPE_P (type_b)
2419 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2420 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2421 break;
2422
2423 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2424 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2425 if (size_a <= size_b)
2426 {
2427 index_a += 1;
2428 ref_a = object_a;
2429 }
2430 if (size_b <= size_a)
2431 {
2432 index_b += 1;
2433 ref_b = object_b;
2434 }
2435 }
2436
2437 /* See whether FULL_SEQ ends at the base and whether the two bases
2438 are equal. We do not care about TBAA or alignment info so we can
2439 use OEP_ADDRESS_OF to avoid false negatives. */
2440 tree base_a = DR_BASE_OBJECT (a);
2441 tree base_b = DR_BASE_OBJECT (b);
2442 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2443 && full_seq.start_b + full_seq.length == num_dimensions_b
2444 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2445 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2446 && types_compatible_p (TREE_TYPE (base_a),
2447 TREE_TYPE (base_b))
2448 && (!loop_nest.exists ()
2449 || (object_address_invariant_in_loop_p
2450 (loop_nest[0], base_a))));
2451
2452 /* If the bases are the same, we can include the base variation too.
2453 E.g. the b accesses in:
2454
2455 for (int i = 0; i < n; ++i)
2456 b[i + 4][0] = b[i][0];
2457
2458 have a definite dependence distance of 4, while for:
2459
2460 for (int i = 0; i < n; ++i)
2461 a[i + 4][0] = b[i][0];
2462
2463 the dependence distance depends on the gap between a and b.
2464
2465 If the bases are different then we can only rely on the sequence
2466 rooted at a structure access, since arrays are allowed to overlap
2467 arbitrarily and change shape arbitrarily. E.g. we treat this as
2468 valid code:
2469
2470 int a[256];
2471 ...
2472 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2473
2474 where two lvalues with the same int[4][3] type overlap, and where
2475 both lvalues are distinct from the object's declared type. */
2476 if (same_base_p)
2477 {
2478 if (DR_UNCONSTRAINED_BASE (a))
2479 full_seq.length += 1;
2480 }
2481 else
2482 full_seq = struct_seq;
2483
2484 /* Punt if we didn't find a suitable sequence. */
2485 if (full_seq.length == 0)
2486 {
2487 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2488 return res;
2489 }
2490
2491 if (!same_base_p)
2492 {
2493 /* Partial overlap is possible for different bases when strict aliasing
2494 is not in effect. It's also possible if either base involves a union
2495 access; e.g. for:
2496
2497 struct s1 { int a[2]; };
2498 struct s2 { struct s1 b; int c; };
2499 struct s3 { int d; struct s1 e; };
2500 union u { struct s2 f; struct s3 g; } *p, *q;
2501
2502 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2503 "p->g.e" (base "p->g") and might partially overlap the s1 at
2504 "q->g.e" (base "q->g"). */
2505 if (!flag_strict_aliasing
2506 || ref_contains_union_access_p (full_seq.object_a)
2507 || ref_contains_union_access_p (full_seq.object_b))
2508 {
2509 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2510 return res;
2511 }
2512
2513 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2514 if (!loop_nest.exists ()
2515 || (object_address_invariant_in_loop_p (loop_nest[0],
2516 full_seq.object_a)
2517 && object_address_invariant_in_loop_p (loop_nest[0],
2518 full_seq.object_b)))
2519 {
2520 DDR_OBJECT_A (res) = full_seq.object_a;
2521 DDR_OBJECT_B (res) = full_seq.object_b;
2522 }
2523 }
2524
2525 DDR_AFFINE_P (res) = true;
2526 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2527 DDR_SUBSCRIPTS (res).create (full_seq.length);
2528 DDR_LOOP_NEST (res) = loop_nest;
2529 DDR_INNER_LOOP (res) = 0;
2530 DDR_SELF_REFERENCE (res) = false;
2531
2532 for (i = 0; i < full_seq.length; ++i)
2533 {
2534 struct subscript *subscript;
2535
2536 subscript = XNEW (struct subscript);
2537 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2538 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2539 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2540 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2541 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2542 SUB_DISTANCE (subscript) = chrec_dont_know;
2543 DDR_SUBSCRIPTS (res).safe_push (subscript);
2544 }
2545
2546 return res;
2547 }
2548
2549 /* Frees memory used by the conflict function F. */
2550
2551 static void
2552 free_conflict_function (conflict_function *f)
2553 {
2554 unsigned i;
2555
2556 if (CF_NONTRIVIAL_P (f))
2557 {
2558 for (i = 0; i < f->n; i++)
2559 affine_fn_free (f->fns[i]);
2560 }
2561 free (f);
2562 }
2563
2564 /* Frees memory used by SUBSCRIPTS. */
2565
2566 static void
2567 free_subscripts (vec<subscript_p> subscripts)
2568 {
2569 unsigned i;
2570 subscript_p s;
2571
2572 FOR_EACH_VEC_ELT (subscripts, i, s)
2573 {
2574 free_conflict_function (s->conflicting_iterations_in_a);
2575 free_conflict_function (s->conflicting_iterations_in_b);
2576 free (s);
2577 }
2578 subscripts.release ();
2579 }
2580
2581 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2582 description. */
2583
2584 static inline void
2585 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2586 tree chrec)
2587 {
2588 DDR_ARE_DEPENDENT (ddr) = chrec;
2589 free_subscripts (DDR_SUBSCRIPTS (ddr));
2590 DDR_SUBSCRIPTS (ddr).create (0);
2591 }
2592
2593 /* The dependence relation DDR cannot be represented by a distance
2594 vector. */
2595
2596 static inline void
2597 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2598 {
2599 if (dump_file && (dump_flags & TDF_DETAILS))
2600 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2601
2602 DDR_AFFINE_P (ddr) = false;
2603 }
2604
2605 \f
2606
2607 /* This section contains the classic Banerjee tests. */
2608
2609 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2610 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2611
2612 static inline bool
2613 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2614 {
2615 return (evolution_function_is_constant_p (chrec_a)
2616 && evolution_function_is_constant_p (chrec_b));
2617 }
2618
2619 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2620 variable, i.e., if the SIV (Single Index Variable) test is true. */
2621
2622 static bool
2623 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2624 {
2625 if ((evolution_function_is_constant_p (chrec_a)
2626 && evolution_function_is_univariate_p (chrec_b))
2627 || (evolution_function_is_constant_p (chrec_b)
2628 && evolution_function_is_univariate_p (chrec_a)))
2629 return true;
2630
2631 if (evolution_function_is_univariate_p (chrec_a)
2632 && evolution_function_is_univariate_p (chrec_b))
2633 {
2634 switch (TREE_CODE (chrec_a))
2635 {
2636 case POLYNOMIAL_CHREC:
2637 switch (TREE_CODE (chrec_b))
2638 {
2639 case POLYNOMIAL_CHREC:
2640 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2641 return false;
2642 /* FALLTHRU */
2643
2644 default:
2645 return true;
2646 }
2647
2648 default:
2649 return true;
2650 }
2651 }
2652
2653 return false;
2654 }
2655
2656 /* Creates a conflict function with N dimensions. The affine functions
2657 in each dimension follow. */
2658
2659 static conflict_function *
2660 conflict_fn (unsigned n, ...)
2661 {
2662 unsigned i;
2663 conflict_function *ret = XCNEW (conflict_function);
2664 va_list ap;
2665
2666 gcc_assert (0 < n && n <= MAX_DIM);
2667 va_start (ap, n);
2668
2669 ret->n = n;
2670 for (i = 0; i < n; i++)
2671 ret->fns[i] = va_arg (ap, affine_fn);
2672 va_end (ap);
2673
2674 return ret;
2675 }
2676
2677 /* Returns constant affine function with value CST. */
2678
2679 static affine_fn
2680 affine_fn_cst (tree cst)
2681 {
2682 affine_fn fn;
2683 fn.create (1);
2684 fn.quick_push (cst);
2685 return fn;
2686 }
2687
2688 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2689
2690 static affine_fn
2691 affine_fn_univar (tree cst, unsigned dim, tree coef)
2692 {
2693 affine_fn fn;
2694 fn.create (dim + 1);
2695 unsigned i;
2696
2697 gcc_assert (dim > 0);
2698 fn.quick_push (cst);
2699 for (i = 1; i < dim; i++)
2700 fn.quick_push (integer_zero_node);
2701 fn.quick_push (coef);
2702 return fn;
2703 }
2704
2705 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2706 *OVERLAPS_B are initialized to the functions that describe the
2707 relation between the elements accessed twice by CHREC_A and
2708 CHREC_B. For k >= 0, the following property is verified:
2709
2710 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2711
2712 static void
2713 analyze_ziv_subscript (tree chrec_a,
2714 tree chrec_b,
2715 conflict_function **overlaps_a,
2716 conflict_function **overlaps_b,
2717 tree *last_conflicts)
2718 {
2719 tree type, difference;
2720 dependence_stats.num_ziv++;
2721
2722 if (dump_file && (dump_flags & TDF_DETAILS))
2723 fprintf (dump_file, "(analyze_ziv_subscript \n");
2724
2725 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2726 chrec_a = chrec_convert (type, chrec_a, NULL);
2727 chrec_b = chrec_convert (type, chrec_b, NULL);
2728 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2729
2730 switch (TREE_CODE (difference))
2731 {
2732 case INTEGER_CST:
2733 if (integer_zerop (difference))
2734 {
2735 /* The difference is equal to zero: the accessed index
2736 overlaps for each iteration in the loop. */
2737 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2738 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2739 *last_conflicts = chrec_dont_know;
2740 dependence_stats.num_ziv_dependent++;
2741 }
2742 else
2743 {
2744 /* The accesses do not overlap. */
2745 *overlaps_a = conflict_fn_no_dependence ();
2746 *overlaps_b = conflict_fn_no_dependence ();
2747 *last_conflicts = integer_zero_node;
2748 dependence_stats.num_ziv_independent++;
2749 }
2750 break;
2751
2752 default:
2753 /* We're not sure whether the indexes overlap. For the moment,
2754 conservatively answer "don't know". */
2755 if (dump_file && (dump_flags & TDF_DETAILS))
2756 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2757
2758 *overlaps_a = conflict_fn_not_known ();
2759 *overlaps_b = conflict_fn_not_known ();
2760 *last_conflicts = chrec_dont_know;
2761 dependence_stats.num_ziv_unimplemented++;
2762 break;
2763 }
2764
2765 if (dump_file && (dump_flags & TDF_DETAILS))
2766 fprintf (dump_file, ")\n");
2767 }
2768
2769 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2770 and only if it fits to the int type. If this is not the case, or the
2771 bound on the number of iterations of LOOP could not be derived, returns
2772 chrec_dont_know. */
2773
2774 static tree
2775 max_stmt_executions_tree (struct loop *loop)
2776 {
2777 widest_int nit;
2778
2779 if (!max_stmt_executions (loop, &nit))
2780 return chrec_dont_know;
2781
2782 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2783 return chrec_dont_know;
2784
2785 return wide_int_to_tree (unsigned_type_node, nit);
2786 }
2787
2788 /* Determine whether the CHREC is always positive/negative. If the expression
2789 cannot be statically analyzed, return false, otherwise set the answer into
2790 VALUE. */
2791
2792 static bool
2793 chrec_is_positive (tree chrec, bool *value)
2794 {
2795 bool value0, value1, value2;
2796 tree end_value, nb_iter;
2797
2798 switch (TREE_CODE (chrec))
2799 {
2800 case POLYNOMIAL_CHREC:
2801 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2802 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2803 return false;
2804
2805 /* FIXME -- overflows. */
2806 if (value0 == value1)
2807 {
2808 *value = value0;
2809 return true;
2810 }
2811
2812 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2813 and the proof consists in showing that the sign never
2814 changes during the execution of the loop, from 0 to
2815 loop->nb_iterations. */
2816 if (!evolution_function_is_affine_p (chrec))
2817 return false;
2818
2819 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2820 if (chrec_contains_undetermined (nb_iter))
2821 return false;
2822
2823 #if 0
2824 /* TODO -- If the test is after the exit, we may decrease the number of
2825 iterations by one. */
2826 if (after_exit)
2827 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2828 #endif
2829
2830 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2831
2832 if (!chrec_is_positive (end_value, &value2))
2833 return false;
2834
2835 *value = value0;
2836 return value0 == value1;
2837
2838 case INTEGER_CST:
2839 switch (tree_int_cst_sgn (chrec))
2840 {
2841 case -1:
2842 *value = false;
2843 break;
2844 case 1:
2845 *value = true;
2846 break;
2847 default:
2848 return false;
2849 }
2850 return true;
2851
2852 default:
2853 return false;
2854 }
2855 }
2856
2857
2858 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2859 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2860 *OVERLAPS_B are initialized to the functions that describe the
2861 relation between the elements accessed twice by CHREC_A and
2862 CHREC_B. For k >= 0, the following property is verified:
2863
2864 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2865
2866 static void
2867 analyze_siv_subscript_cst_affine (tree chrec_a,
2868 tree chrec_b,
2869 conflict_function **overlaps_a,
2870 conflict_function **overlaps_b,
2871 tree *last_conflicts)
2872 {
2873 bool value0, value1, value2;
2874 tree type, difference, tmp;
2875
2876 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2877 chrec_a = chrec_convert (type, chrec_a, NULL);
2878 chrec_b = chrec_convert (type, chrec_b, NULL);
2879 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2880
2881 /* Special case overlap in the first iteration. */
2882 if (integer_zerop (difference))
2883 {
2884 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2885 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2886 *last_conflicts = integer_one_node;
2887 return;
2888 }
2889
2890 if (!chrec_is_positive (initial_condition (difference), &value0))
2891 {
2892 if (dump_file && (dump_flags & TDF_DETAILS))
2893 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2894
2895 dependence_stats.num_siv_unimplemented++;
2896 *overlaps_a = conflict_fn_not_known ();
2897 *overlaps_b = conflict_fn_not_known ();
2898 *last_conflicts = chrec_dont_know;
2899 return;
2900 }
2901 else
2902 {
2903 if (value0 == false)
2904 {
2905 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2906 {
2907 if (dump_file && (dump_flags & TDF_DETAILS))
2908 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2909
2910 *overlaps_a = conflict_fn_not_known ();
2911 *overlaps_b = conflict_fn_not_known ();
2912 *last_conflicts = chrec_dont_know;
2913 dependence_stats.num_siv_unimplemented++;
2914 return;
2915 }
2916 else
2917 {
2918 if (value1 == true)
2919 {
2920 /* Example:
2921 chrec_a = 12
2922 chrec_b = {10, +, 1}
2923 */
2924
2925 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2926 {
2927 HOST_WIDE_INT numiter;
2928 struct loop *loop = get_chrec_loop (chrec_b);
2929
2930 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2931 tmp = fold_build2 (EXACT_DIV_EXPR, type,
2932 fold_build1 (ABS_EXPR, type, difference),
2933 CHREC_RIGHT (chrec_b));
2934 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2935 *last_conflicts = integer_one_node;
2936
2937
2938 /* Perform weak-zero siv test to see if overlap is
2939 outside the loop bounds. */
2940 numiter = max_stmt_executions_int (loop);
2941
2942 if (numiter >= 0
2943 && compare_tree_int (tmp, numiter) > 0)
2944 {
2945 free_conflict_function (*overlaps_a);
2946 free_conflict_function (*overlaps_b);
2947 *overlaps_a = conflict_fn_no_dependence ();
2948 *overlaps_b = conflict_fn_no_dependence ();
2949 *last_conflicts = integer_zero_node;
2950 dependence_stats.num_siv_independent++;
2951 return;
2952 }
2953 dependence_stats.num_siv_dependent++;
2954 return;
2955 }
2956
2957 /* When the step does not divide the difference, there are
2958 no overlaps. */
2959 else
2960 {
2961 *overlaps_a = conflict_fn_no_dependence ();
2962 *overlaps_b = conflict_fn_no_dependence ();
2963 *last_conflicts = integer_zero_node;
2964 dependence_stats.num_siv_independent++;
2965 return;
2966 }
2967 }
2968
2969 else
2970 {
2971 /* Example:
2972 chrec_a = 12
2973 chrec_b = {10, +, -1}
2974
2975 In this case, chrec_a will not overlap with chrec_b. */
2976 *overlaps_a = conflict_fn_no_dependence ();
2977 *overlaps_b = conflict_fn_no_dependence ();
2978 *last_conflicts = integer_zero_node;
2979 dependence_stats.num_siv_independent++;
2980 return;
2981 }
2982 }
2983 }
2984 else
2985 {
2986 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2987 {
2988 if (dump_file && (dump_flags & TDF_DETAILS))
2989 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2990
2991 *overlaps_a = conflict_fn_not_known ();
2992 *overlaps_b = conflict_fn_not_known ();
2993 *last_conflicts = chrec_dont_know;
2994 dependence_stats.num_siv_unimplemented++;
2995 return;
2996 }
2997 else
2998 {
2999 if (value2 == false)
3000 {
3001 /* Example:
3002 chrec_a = 3
3003 chrec_b = {10, +, -1}
3004 */
3005 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3006 {
3007 HOST_WIDE_INT numiter;
3008 struct loop *loop = get_chrec_loop (chrec_b);
3009
3010 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3011 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3012 CHREC_RIGHT (chrec_b));
3013 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3014 *last_conflicts = integer_one_node;
3015
3016 /* Perform weak-zero siv test to see if overlap is
3017 outside the loop bounds. */
3018 numiter = max_stmt_executions_int (loop);
3019
3020 if (numiter >= 0
3021 && compare_tree_int (tmp, numiter) > 0)
3022 {
3023 free_conflict_function (*overlaps_a);
3024 free_conflict_function (*overlaps_b);
3025 *overlaps_a = conflict_fn_no_dependence ();
3026 *overlaps_b = conflict_fn_no_dependence ();
3027 *last_conflicts = integer_zero_node;
3028 dependence_stats.num_siv_independent++;
3029 return;
3030 }
3031 dependence_stats.num_siv_dependent++;
3032 return;
3033 }
3034
3035 /* When the step does not divide the difference, there
3036 are no overlaps. */
3037 else
3038 {
3039 *overlaps_a = conflict_fn_no_dependence ();
3040 *overlaps_b = conflict_fn_no_dependence ();
3041 *last_conflicts = integer_zero_node;
3042 dependence_stats.num_siv_independent++;
3043 return;
3044 }
3045 }
3046 else
3047 {
3048 /* Example:
3049 chrec_a = 3
3050 chrec_b = {4, +, 1}
3051
3052 In this case, chrec_a will not overlap with chrec_b. */
3053 *overlaps_a = conflict_fn_no_dependence ();
3054 *overlaps_b = conflict_fn_no_dependence ();
3055 *last_conflicts = integer_zero_node;
3056 dependence_stats.num_siv_independent++;
3057 return;
3058 }
3059 }
3060 }
3061 }
3062 }
3063
3064 /* Helper recursive function for initializing the matrix A. Returns
3065 the initial value of CHREC. */
3066
3067 static tree
3068 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3069 {
3070 gcc_assert (chrec);
3071
3072 switch (TREE_CODE (chrec))
3073 {
3074 case POLYNOMIAL_CHREC:
3075 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3076 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3077
3078 case PLUS_EXPR:
3079 case MULT_EXPR:
3080 case MINUS_EXPR:
3081 {
3082 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3083 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3084
3085 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3086 }
3087
3088 CASE_CONVERT:
3089 {
3090 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3091 return chrec_convert (chrec_type (chrec), op, NULL);
3092 }
3093
3094 case BIT_NOT_EXPR:
3095 {
3096 /* Handle ~X as -1 - X. */
3097 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3098 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3099 build_int_cst (TREE_TYPE (chrec), -1), op);
3100 }
3101
3102 case INTEGER_CST:
3103 return chrec;
3104
3105 default:
3106 gcc_unreachable ();
3107 return NULL_TREE;
3108 }
3109 }
3110
3111 #define FLOOR_DIV(x,y) ((x) / (y))
3112
3113 /* Solves the special case of the Diophantine equation:
3114 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3115
3116 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3117 number of iterations that loops X and Y run. The overlaps will be
3118 constructed as evolutions in dimension DIM. */
3119
3120 static void
3121 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3122 HOST_WIDE_INT step_a,
3123 HOST_WIDE_INT step_b,
3124 affine_fn *overlaps_a,
3125 affine_fn *overlaps_b,
3126 tree *last_conflicts, int dim)
3127 {
3128 if (((step_a > 0 && step_b > 0)
3129 || (step_a < 0 && step_b < 0)))
3130 {
3131 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3132 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3133
3134 gcd_steps_a_b = gcd (step_a, step_b);
3135 step_overlaps_a = step_b / gcd_steps_a_b;
3136 step_overlaps_b = step_a / gcd_steps_a_b;
3137
3138 if (niter > 0)
3139 {
3140 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3141 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3142 last_conflict = tau2;
3143 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3144 }
3145 else
3146 *last_conflicts = chrec_dont_know;
3147
3148 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3149 build_int_cst (NULL_TREE,
3150 step_overlaps_a));
3151 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3152 build_int_cst (NULL_TREE,
3153 step_overlaps_b));
3154 }
3155
3156 else
3157 {
3158 *overlaps_a = affine_fn_cst (integer_zero_node);
3159 *overlaps_b = affine_fn_cst (integer_zero_node);
3160 *last_conflicts = integer_zero_node;
3161 }
3162 }
3163
3164 /* Solves the special case of a Diophantine equation where CHREC_A is
3165 an affine bivariate function, and CHREC_B is an affine univariate
3166 function. For example,
3167
3168 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3169
3170 has the following overlapping functions:
3171
3172 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3173 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3174 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3175
3176 FORNOW: This is a specialized implementation for a case occurring in
3177 a common benchmark. Implement the general algorithm. */
3178
3179 static void
3180 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3181 conflict_function **overlaps_a,
3182 conflict_function **overlaps_b,
3183 tree *last_conflicts)
3184 {
3185 bool xz_p, yz_p, xyz_p;
3186 HOST_WIDE_INT step_x, step_y, step_z;
3187 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3188 affine_fn overlaps_a_xz, overlaps_b_xz;
3189 affine_fn overlaps_a_yz, overlaps_b_yz;
3190 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3191 affine_fn ova1, ova2, ovb;
3192 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3193
3194 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3195 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3196 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3197
3198 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3199 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3200 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3201
3202 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3203 {
3204 if (dump_file && (dump_flags & TDF_DETAILS))
3205 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3206
3207 *overlaps_a = conflict_fn_not_known ();
3208 *overlaps_b = conflict_fn_not_known ();
3209 *last_conflicts = chrec_dont_know;
3210 return;
3211 }
3212
3213 niter = MIN (niter_x, niter_z);
3214 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3215 &overlaps_a_xz,
3216 &overlaps_b_xz,
3217 &last_conflicts_xz, 1);
3218 niter = MIN (niter_y, niter_z);
3219 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3220 &overlaps_a_yz,
3221 &overlaps_b_yz,
3222 &last_conflicts_yz, 2);
3223 niter = MIN (niter_x, niter_z);
3224 niter = MIN (niter_y, niter);
3225 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3226 &overlaps_a_xyz,
3227 &overlaps_b_xyz,
3228 &last_conflicts_xyz, 3);
3229
3230 xz_p = !integer_zerop (last_conflicts_xz);
3231 yz_p = !integer_zerop (last_conflicts_yz);
3232 xyz_p = !integer_zerop (last_conflicts_xyz);
3233
3234 if (xz_p || yz_p || xyz_p)
3235 {
3236 ova1 = affine_fn_cst (integer_zero_node);
3237 ova2 = affine_fn_cst (integer_zero_node);
3238 ovb = affine_fn_cst (integer_zero_node);
3239 if (xz_p)
3240 {
3241 affine_fn t0 = ova1;
3242 affine_fn t2 = ovb;
3243
3244 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3245 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3246 affine_fn_free (t0);
3247 affine_fn_free (t2);
3248 *last_conflicts = last_conflicts_xz;
3249 }
3250 if (yz_p)
3251 {
3252 affine_fn t0 = ova2;
3253 affine_fn t2 = ovb;
3254
3255 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3256 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3257 affine_fn_free (t0);
3258 affine_fn_free (t2);
3259 *last_conflicts = last_conflicts_yz;
3260 }
3261 if (xyz_p)
3262 {
3263 affine_fn t0 = ova1;
3264 affine_fn t2 = ova2;
3265 affine_fn t4 = ovb;
3266
3267 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3268 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3269 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3270 affine_fn_free (t0);
3271 affine_fn_free (t2);
3272 affine_fn_free (t4);
3273 *last_conflicts = last_conflicts_xyz;
3274 }
3275 *overlaps_a = conflict_fn (2, ova1, ova2);
3276 *overlaps_b = conflict_fn (1, ovb);
3277 }
3278 else
3279 {
3280 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3281 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3282 *last_conflicts = integer_zero_node;
3283 }
3284
3285 affine_fn_free (overlaps_a_xz);
3286 affine_fn_free (overlaps_b_xz);
3287 affine_fn_free (overlaps_a_yz);
3288 affine_fn_free (overlaps_b_yz);
3289 affine_fn_free (overlaps_a_xyz);
3290 affine_fn_free (overlaps_b_xyz);
3291 }
3292
3293 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3294
3295 static void
3296 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3297 int size)
3298 {
3299 memcpy (vec2, vec1, size * sizeof (*vec1));
3300 }
3301
3302 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3303
3304 static void
3305 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3306 int m, int n)
3307 {
3308 int i;
3309
3310 for (i = 0; i < m; i++)
3311 lambda_vector_copy (mat1[i], mat2[i], n);
3312 }
3313
3314 /* Store the N x N identity matrix in MAT. */
3315
3316 static void
3317 lambda_matrix_id (lambda_matrix mat, int size)
3318 {
3319 int i, j;
3320
3321 for (i = 0; i < size; i++)
3322 for (j = 0; j < size; j++)
3323 mat[i][j] = (i == j) ? 1 : 0;
3324 }
3325
3326 /* Return the first nonzero element of vector VEC1 between START and N.
3327 We must have START <= N. Returns N if VEC1 is the zero vector. */
3328
3329 static int
3330 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3331 {
3332 int j = start;
3333 while (j < n && vec1[j] == 0)
3334 j++;
3335 return j;
3336 }
3337
3338 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3339 R2 = R2 + CONST1 * R1. */
3340
3341 static void
3342 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3343 {
3344 int i;
3345
3346 if (const1 == 0)
3347 return;
3348
3349 for (i = 0; i < n; i++)
3350 mat[r2][i] += const1 * mat[r1][i];
3351 }
3352
3353 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3354 and store the result in VEC2. */
3355
3356 static void
3357 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3358 int size, int const1)
3359 {
3360 int i;
3361
3362 if (const1 == 0)
3363 lambda_vector_clear (vec2, size);
3364 else
3365 for (i = 0; i < size; i++)
3366 vec2[i] = const1 * vec1[i];
3367 }
3368
3369 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3370
3371 static void
3372 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3373 int size)
3374 {
3375 lambda_vector_mult_const (vec1, vec2, size, -1);
3376 }
3377
3378 /* Negate row R1 of matrix MAT which has N columns. */
3379
3380 static void
3381 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3382 {
3383 lambda_vector_negate (mat[r1], mat[r1], n);
3384 }
3385
3386 /* Return true if two vectors are equal. */
3387
3388 static bool
3389 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3390 {
3391 int i;
3392 for (i = 0; i < size; i++)
3393 if (vec1[i] != vec2[i])
3394 return false;
3395 return true;
3396 }
3397
3398 /* Given an M x N integer matrix A, this function determines an M x
3399 M unimodular matrix U, and an M x N echelon matrix S such that
3400 "U.A = S". This decomposition is also known as "right Hermite".
3401
3402 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3403 Restructuring Compilers" Utpal Banerjee. */
3404
3405 static void
3406 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3407 lambda_matrix S, lambda_matrix U)
3408 {
3409 int i, j, i0 = 0;
3410
3411 lambda_matrix_copy (A, S, m, n);
3412 lambda_matrix_id (U, m);
3413
3414 for (j = 0; j < n; j++)
3415 {
3416 if (lambda_vector_first_nz (S[j], m, i0) < m)
3417 {
3418 ++i0;
3419 for (i = m - 1; i >= i0; i--)
3420 {
3421 while (S[i][j] != 0)
3422 {
3423 int sigma, factor, a, b;
3424
3425 a = S[i-1][j];
3426 b = S[i][j];
3427 sigma = (a * b < 0) ? -1: 1;
3428 a = abs (a);
3429 b = abs (b);
3430 factor = sigma * (a / b);
3431
3432 lambda_matrix_row_add (S, n, i, i-1, -factor);
3433 std::swap (S[i], S[i-1]);
3434
3435 lambda_matrix_row_add (U, m, i, i-1, -factor);
3436 std::swap (U[i], U[i-1]);
3437 }
3438 }
3439 }
3440 }
3441 }
3442
3443 /* Determines the overlapping elements due to accesses CHREC_A and
3444 CHREC_B, that are affine functions. This function cannot handle
3445 symbolic evolution functions, ie. when initial conditions are
3446 parameters, because it uses lambda matrices of integers. */
3447
3448 static void
3449 analyze_subscript_affine_affine (tree chrec_a,
3450 tree chrec_b,
3451 conflict_function **overlaps_a,
3452 conflict_function **overlaps_b,
3453 tree *last_conflicts)
3454 {
3455 unsigned nb_vars_a, nb_vars_b, dim;
3456 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3457 lambda_matrix A, U, S;
3458 struct obstack scratch_obstack;
3459
3460 if (eq_evolutions_p (chrec_a, chrec_b))
3461 {
3462 /* The accessed index overlaps for each iteration in the
3463 loop. */
3464 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3465 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3466 *last_conflicts = chrec_dont_know;
3467 return;
3468 }
3469 if (dump_file && (dump_flags & TDF_DETAILS))
3470 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3471
3472 /* For determining the initial intersection, we have to solve a
3473 Diophantine equation. This is the most time consuming part.
3474
3475 For answering to the question: "Is there a dependence?" we have
3476 to prove that there exists a solution to the Diophantine
3477 equation, and that the solution is in the iteration domain,
3478 i.e. the solution is positive or zero, and that the solution
3479 happens before the upper bound loop.nb_iterations. Otherwise
3480 there is no dependence. This function outputs a description of
3481 the iterations that hold the intersections. */
3482
3483 nb_vars_a = nb_vars_in_chrec (chrec_a);
3484 nb_vars_b = nb_vars_in_chrec (chrec_b);
3485
3486 gcc_obstack_init (&scratch_obstack);
3487
3488 dim = nb_vars_a + nb_vars_b;
3489 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3490 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3491 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3492
3493 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3494 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3495 gamma = init_b - init_a;
3496
3497 /* Don't do all the hard work of solving the Diophantine equation
3498 when we already know the solution: for example,
3499 | {3, +, 1}_1
3500 | {3, +, 4}_2
3501 | gamma = 3 - 3 = 0.
3502 Then the first overlap occurs during the first iterations:
3503 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3504 */
3505 if (gamma == 0)
3506 {
3507 if (nb_vars_a == 1 && nb_vars_b == 1)
3508 {
3509 HOST_WIDE_INT step_a, step_b;
3510 HOST_WIDE_INT niter, niter_a, niter_b;
3511 affine_fn ova, ovb;
3512
3513 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3514 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3515 niter = MIN (niter_a, niter_b);
3516 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3517 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3518
3519 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3520 &ova, &ovb,
3521 last_conflicts, 1);
3522 *overlaps_a = conflict_fn (1, ova);
3523 *overlaps_b = conflict_fn (1, ovb);
3524 }
3525
3526 else if (nb_vars_a == 2 && nb_vars_b == 1)
3527 compute_overlap_steps_for_affine_1_2
3528 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3529
3530 else if (nb_vars_a == 1 && nb_vars_b == 2)
3531 compute_overlap_steps_for_affine_1_2
3532 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3533
3534 else
3535 {
3536 if (dump_file && (dump_flags & TDF_DETAILS))
3537 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3538 *overlaps_a = conflict_fn_not_known ();
3539 *overlaps_b = conflict_fn_not_known ();
3540 *last_conflicts = chrec_dont_know;
3541 }
3542 goto end_analyze_subs_aa;
3543 }
3544
3545 /* U.A = S */
3546 lambda_matrix_right_hermite (A, dim, 1, S, U);
3547
3548 if (S[0][0] < 0)
3549 {
3550 S[0][0] *= -1;
3551 lambda_matrix_row_negate (U, dim, 0);
3552 }
3553 gcd_alpha_beta = S[0][0];
3554
3555 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3556 but that is a quite strange case. Instead of ICEing, answer
3557 don't know. */
3558 if (gcd_alpha_beta == 0)
3559 {
3560 *overlaps_a = conflict_fn_not_known ();
3561 *overlaps_b = conflict_fn_not_known ();
3562 *last_conflicts = chrec_dont_know;
3563 goto end_analyze_subs_aa;
3564 }
3565
3566 /* The classic "gcd-test". */
3567 if (!int_divides_p (gcd_alpha_beta, gamma))
3568 {
3569 /* The "gcd-test" has determined that there is no integer
3570 solution, i.e. there is no dependence. */
3571 *overlaps_a = conflict_fn_no_dependence ();
3572 *overlaps_b = conflict_fn_no_dependence ();
3573 *last_conflicts = integer_zero_node;
3574 }
3575
3576 /* Both access functions are univariate. This includes SIV and MIV cases. */
3577 else if (nb_vars_a == 1 && nb_vars_b == 1)
3578 {
3579 /* Both functions should have the same evolution sign. */
3580 if (((A[0][0] > 0 && -A[1][0] > 0)
3581 || (A[0][0] < 0 && -A[1][0] < 0)))
3582 {
3583 /* The solutions are given by:
3584 |
3585 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3586 | [u21 u22] [y0]
3587
3588 For a given integer t. Using the following variables,
3589
3590 | i0 = u11 * gamma / gcd_alpha_beta
3591 | j0 = u12 * gamma / gcd_alpha_beta
3592 | i1 = u21
3593 | j1 = u22
3594
3595 the solutions are:
3596
3597 | x0 = i0 + i1 * t,
3598 | y0 = j0 + j1 * t. */
3599 HOST_WIDE_INT i0, j0, i1, j1;
3600
3601 i0 = U[0][0] * gamma / gcd_alpha_beta;
3602 j0 = U[0][1] * gamma / gcd_alpha_beta;
3603 i1 = U[1][0];
3604 j1 = U[1][1];
3605
3606 if ((i1 == 0 && i0 < 0)
3607 || (j1 == 0 && j0 < 0))
3608 {
3609 /* There is no solution.
3610 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3611 falls in here, but for the moment we don't look at the
3612 upper bound of the iteration domain. */
3613 *overlaps_a = conflict_fn_no_dependence ();
3614 *overlaps_b = conflict_fn_no_dependence ();
3615 *last_conflicts = integer_zero_node;
3616 goto end_analyze_subs_aa;
3617 }
3618
3619 if (i1 > 0 && j1 > 0)
3620 {
3621 HOST_WIDE_INT niter_a
3622 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3623 HOST_WIDE_INT niter_b
3624 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3625 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3626
3627 /* (X0, Y0) is a solution of the Diophantine equation:
3628 "chrec_a (X0) = chrec_b (Y0)". */
3629 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3630 CEIL (-j0, j1));
3631 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3632 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3633
3634 /* (X1, Y1) is the smallest positive solution of the eq
3635 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3636 first conflict occurs. */
3637 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3638 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3639 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3640
3641 if (niter > 0)
3642 {
3643 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3644 FLOOR_DIV (niter_b - j0, j1));
3645 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3646
3647 /* If the overlap occurs outside of the bounds of the
3648 loop, there is no dependence. */
3649 if (x1 >= niter_a || y1 >= niter_b)
3650 {
3651 *overlaps_a = conflict_fn_no_dependence ();
3652 *overlaps_b = conflict_fn_no_dependence ();
3653 *last_conflicts = integer_zero_node;
3654 goto end_analyze_subs_aa;
3655 }
3656 else
3657 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3658 }
3659 else
3660 *last_conflicts = chrec_dont_know;
3661
3662 *overlaps_a
3663 = conflict_fn (1,
3664 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3665 1,
3666 build_int_cst (NULL_TREE, i1)));
3667 *overlaps_b
3668 = conflict_fn (1,
3669 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3670 1,
3671 build_int_cst (NULL_TREE, j1)));
3672 }
3673 else
3674 {
3675 /* FIXME: For the moment, the upper bound of the
3676 iteration domain for i and j is not checked. */
3677 if (dump_file && (dump_flags & TDF_DETAILS))
3678 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3679 *overlaps_a = conflict_fn_not_known ();
3680 *overlaps_b = conflict_fn_not_known ();
3681 *last_conflicts = chrec_dont_know;
3682 }
3683 }
3684 else
3685 {
3686 if (dump_file && (dump_flags & TDF_DETAILS))
3687 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3688 *overlaps_a = conflict_fn_not_known ();
3689 *overlaps_b = conflict_fn_not_known ();
3690 *last_conflicts = chrec_dont_know;
3691 }
3692 }
3693 else
3694 {
3695 if (dump_file && (dump_flags & TDF_DETAILS))
3696 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3697 *overlaps_a = conflict_fn_not_known ();
3698 *overlaps_b = conflict_fn_not_known ();
3699 *last_conflicts = chrec_dont_know;
3700 }
3701
3702 end_analyze_subs_aa:
3703 obstack_free (&scratch_obstack, NULL);
3704 if (dump_file && (dump_flags & TDF_DETAILS))
3705 {
3706 fprintf (dump_file, " (overlaps_a = ");
3707 dump_conflict_function (dump_file, *overlaps_a);
3708 fprintf (dump_file, ")\n (overlaps_b = ");
3709 dump_conflict_function (dump_file, *overlaps_b);
3710 fprintf (dump_file, "))\n");
3711 }
3712 }
3713
3714 /* Returns true when analyze_subscript_affine_affine can be used for
3715 determining the dependence relation between chrec_a and chrec_b,
3716 that contain symbols. This function modifies chrec_a and chrec_b
3717 such that the analysis result is the same, and such that they don't
3718 contain symbols, and then can safely be passed to the analyzer.
3719
3720 Example: The analysis of the following tuples of evolutions produce
3721 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3722 vs. {0, +, 1}_1
3723
3724 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3725 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3726 */
3727
3728 static bool
3729 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3730 {
3731 tree diff, type, left_a, left_b, right_b;
3732
3733 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3734 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3735 /* FIXME: For the moment not handled. Might be refined later. */
3736 return false;
3737
3738 type = chrec_type (*chrec_a);
3739 left_a = CHREC_LEFT (*chrec_a);
3740 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3741 diff = chrec_fold_minus (type, left_a, left_b);
3742
3743 if (!evolution_function_is_constant_p (diff))
3744 return false;
3745
3746 if (dump_file && (dump_flags & TDF_DETAILS))
3747 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3748
3749 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3750 diff, CHREC_RIGHT (*chrec_a));
3751 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3752 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3753 build_int_cst (type, 0),
3754 right_b);
3755 return true;
3756 }
3757
3758 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3759 *OVERLAPS_B are initialized to the functions that describe the
3760 relation between the elements accessed twice by CHREC_A and
3761 CHREC_B. For k >= 0, the following property is verified:
3762
3763 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3764
3765 static void
3766 analyze_siv_subscript (tree chrec_a,
3767 tree chrec_b,
3768 conflict_function **overlaps_a,
3769 conflict_function **overlaps_b,
3770 tree *last_conflicts,
3771 int loop_nest_num)
3772 {
3773 dependence_stats.num_siv++;
3774
3775 if (dump_file && (dump_flags & TDF_DETAILS))
3776 fprintf (dump_file, "(analyze_siv_subscript \n");
3777
3778 if (evolution_function_is_constant_p (chrec_a)
3779 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3780 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3781 overlaps_a, overlaps_b, last_conflicts);
3782
3783 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3784 && evolution_function_is_constant_p (chrec_b))
3785 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3786 overlaps_b, overlaps_a, last_conflicts);
3787
3788 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3789 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3790 {
3791 if (!chrec_contains_symbols (chrec_a)
3792 && !chrec_contains_symbols (chrec_b))
3793 {
3794 analyze_subscript_affine_affine (chrec_a, chrec_b,
3795 overlaps_a, overlaps_b,
3796 last_conflicts);
3797
3798 if (CF_NOT_KNOWN_P (*overlaps_a)
3799 || CF_NOT_KNOWN_P (*overlaps_b))
3800 dependence_stats.num_siv_unimplemented++;
3801 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3802 || CF_NO_DEPENDENCE_P (*overlaps_b))
3803 dependence_stats.num_siv_independent++;
3804 else
3805 dependence_stats.num_siv_dependent++;
3806 }
3807 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3808 &chrec_b))
3809 {
3810 analyze_subscript_affine_affine (chrec_a, chrec_b,
3811 overlaps_a, overlaps_b,
3812 last_conflicts);
3813
3814 if (CF_NOT_KNOWN_P (*overlaps_a)
3815 || CF_NOT_KNOWN_P (*overlaps_b))
3816 dependence_stats.num_siv_unimplemented++;
3817 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3818 || CF_NO_DEPENDENCE_P (*overlaps_b))
3819 dependence_stats.num_siv_independent++;
3820 else
3821 dependence_stats.num_siv_dependent++;
3822 }
3823 else
3824 goto siv_subscript_dontknow;
3825 }
3826
3827 else
3828 {
3829 siv_subscript_dontknow:;
3830 if (dump_file && (dump_flags & TDF_DETAILS))
3831 fprintf (dump_file, " siv test failed: unimplemented");
3832 *overlaps_a = conflict_fn_not_known ();
3833 *overlaps_b = conflict_fn_not_known ();
3834 *last_conflicts = chrec_dont_know;
3835 dependence_stats.num_siv_unimplemented++;
3836 }
3837
3838 if (dump_file && (dump_flags & TDF_DETAILS))
3839 fprintf (dump_file, ")\n");
3840 }
3841
3842 /* Returns false if we can prove that the greatest common divisor of the steps
3843 of CHREC does not divide CST, false otherwise. */
3844
3845 static bool
3846 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3847 {
3848 HOST_WIDE_INT cd = 0, val;
3849 tree step;
3850
3851 if (!tree_fits_shwi_p (cst))
3852 return true;
3853 val = tree_to_shwi (cst);
3854
3855 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3856 {
3857 step = CHREC_RIGHT (chrec);
3858 if (!tree_fits_shwi_p (step))
3859 return true;
3860 cd = gcd (cd, tree_to_shwi (step));
3861 chrec = CHREC_LEFT (chrec);
3862 }
3863
3864 return val % cd == 0;
3865 }
3866
3867 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3868 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
3869 functions that describe the relation between the elements accessed
3870 twice by CHREC_A and CHREC_B. For k >= 0, the following property
3871 is verified:
3872
3873 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3874
3875 static void
3876 analyze_miv_subscript (tree chrec_a,
3877 tree chrec_b,
3878 conflict_function **overlaps_a,
3879 conflict_function **overlaps_b,
3880 tree *last_conflicts,
3881 struct loop *loop_nest)
3882 {
3883 tree type, difference;
3884
3885 dependence_stats.num_miv++;
3886 if (dump_file && (dump_flags & TDF_DETAILS))
3887 fprintf (dump_file, "(analyze_miv_subscript \n");
3888
3889 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3890 chrec_a = chrec_convert (type, chrec_a, NULL);
3891 chrec_b = chrec_convert (type, chrec_b, NULL);
3892 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3893
3894 if (eq_evolutions_p (chrec_a, chrec_b))
3895 {
3896 /* Access functions are the same: all the elements are accessed
3897 in the same order. */
3898 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3899 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3900 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3901 dependence_stats.num_miv_dependent++;
3902 }
3903
3904 else if (evolution_function_is_constant_p (difference)
3905 /* For the moment, the following is verified:
3906 evolution_function_is_affine_multivariate_p (chrec_a,
3907 loop_nest->num) */
3908 && !gcd_of_steps_may_divide_p (chrec_a, difference))
3909 {
3910 /* testsuite/.../ssa-chrec-33.c
3911 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
3912
3913 The difference is 1, and all the evolution steps are multiples
3914 of 2, consequently there are no overlapping elements. */
3915 *overlaps_a = conflict_fn_no_dependence ();
3916 *overlaps_b = conflict_fn_no_dependence ();
3917 *last_conflicts = integer_zero_node;
3918 dependence_stats.num_miv_independent++;
3919 }
3920
3921 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3922 && !chrec_contains_symbols (chrec_a)
3923 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3924 && !chrec_contains_symbols (chrec_b))
3925 {
3926 /* testsuite/.../ssa-chrec-35.c
3927 {0, +, 1}_2 vs. {0, +, 1}_3
3928 the overlapping elements are respectively located at iterations:
3929 {0, +, 1}_x and {0, +, 1}_x,
3930 in other words, we have the equality:
3931 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3932
3933 Other examples:
3934 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3935 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3936
3937 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3938 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3939 */
3940 analyze_subscript_affine_affine (chrec_a, chrec_b,
3941 overlaps_a, overlaps_b, last_conflicts);
3942
3943 if (CF_NOT_KNOWN_P (*overlaps_a)
3944 || CF_NOT_KNOWN_P (*overlaps_b))
3945 dependence_stats.num_miv_unimplemented++;
3946 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3947 || CF_NO_DEPENDENCE_P (*overlaps_b))
3948 dependence_stats.num_miv_independent++;
3949 else
3950 dependence_stats.num_miv_dependent++;
3951 }
3952
3953 else
3954 {
3955 /* When the analysis is too difficult, answer "don't know". */
3956 if (dump_file && (dump_flags & TDF_DETAILS))
3957 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3958
3959 *overlaps_a = conflict_fn_not_known ();
3960 *overlaps_b = conflict_fn_not_known ();
3961 *last_conflicts = chrec_dont_know;
3962 dependence_stats.num_miv_unimplemented++;
3963 }
3964
3965 if (dump_file && (dump_flags & TDF_DETAILS))
3966 fprintf (dump_file, ")\n");
3967 }
3968
3969 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3970 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3971 OVERLAP_ITERATIONS_B are initialized with two functions that
3972 describe the iterations that contain conflicting elements.
3973
3974 Remark: For an integer k >= 0, the following equality is true:
3975
3976 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3977 */
3978
3979 static void
3980 analyze_overlapping_iterations (tree chrec_a,
3981 tree chrec_b,
3982 conflict_function **overlap_iterations_a,
3983 conflict_function **overlap_iterations_b,
3984 tree *last_conflicts, struct loop *loop_nest)
3985 {
3986 unsigned int lnn = loop_nest->num;
3987
3988 dependence_stats.num_subscript_tests++;
3989
3990 if (dump_file && (dump_flags & TDF_DETAILS))
3991 {
3992 fprintf (dump_file, "(analyze_overlapping_iterations \n");
3993 fprintf (dump_file, " (chrec_a = ");
3994 print_generic_expr (dump_file, chrec_a);
3995 fprintf (dump_file, ")\n (chrec_b = ");
3996 print_generic_expr (dump_file, chrec_b);
3997 fprintf (dump_file, ")\n");
3998 }
3999
4000 if (chrec_a == NULL_TREE
4001 || chrec_b == NULL_TREE
4002 || chrec_contains_undetermined (chrec_a)
4003 || chrec_contains_undetermined (chrec_b))
4004 {
4005 dependence_stats.num_subscript_undetermined++;
4006
4007 *overlap_iterations_a = conflict_fn_not_known ();
4008 *overlap_iterations_b = conflict_fn_not_known ();
4009 }
4010
4011 /* If they are the same chrec, and are affine, they overlap
4012 on every iteration. */
4013 else if (eq_evolutions_p (chrec_a, chrec_b)
4014 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4015 || operand_equal_p (chrec_a, chrec_b, 0)))
4016 {
4017 dependence_stats.num_same_subscript_function++;
4018 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4019 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4020 *last_conflicts = chrec_dont_know;
4021 }
4022
4023 /* If they aren't the same, and aren't affine, we can't do anything
4024 yet. */
4025 else if ((chrec_contains_symbols (chrec_a)
4026 || chrec_contains_symbols (chrec_b))
4027 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4028 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4029 {
4030 dependence_stats.num_subscript_undetermined++;
4031 *overlap_iterations_a = conflict_fn_not_known ();
4032 *overlap_iterations_b = conflict_fn_not_known ();
4033 }
4034
4035 else if (ziv_subscript_p (chrec_a, chrec_b))
4036 analyze_ziv_subscript (chrec_a, chrec_b,
4037 overlap_iterations_a, overlap_iterations_b,
4038 last_conflicts);
4039
4040 else if (siv_subscript_p (chrec_a, chrec_b))
4041 analyze_siv_subscript (chrec_a, chrec_b,
4042 overlap_iterations_a, overlap_iterations_b,
4043 last_conflicts, lnn);
4044
4045 else
4046 analyze_miv_subscript (chrec_a, chrec_b,
4047 overlap_iterations_a, overlap_iterations_b,
4048 last_conflicts, loop_nest);
4049
4050 if (dump_file && (dump_flags & TDF_DETAILS))
4051 {
4052 fprintf (dump_file, " (overlap_iterations_a = ");
4053 dump_conflict_function (dump_file, *overlap_iterations_a);
4054 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4055 dump_conflict_function (dump_file, *overlap_iterations_b);
4056 fprintf (dump_file, "))\n");
4057 }
4058 }
4059
4060 /* Helper function for uniquely inserting distance vectors. */
4061
4062 static void
4063 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4064 {
4065 unsigned i;
4066 lambda_vector v;
4067
4068 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4069 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4070 return;
4071
4072 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4073 }
4074
4075 /* Helper function for uniquely inserting direction vectors. */
4076
4077 static void
4078 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4079 {
4080 unsigned i;
4081 lambda_vector v;
4082
4083 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4084 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4085 return;
4086
4087 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4088 }
4089
4090 /* Add a distance of 1 on all the loops outer than INDEX. If we
4091 haven't yet determined a distance for this outer loop, push a new
4092 distance vector composed of the previous distance, and a distance
4093 of 1 for this outer loop. Example:
4094
4095 | loop_1
4096 | loop_2
4097 | A[10]
4098 | endloop_2
4099 | endloop_1
4100
4101 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4102 save (0, 1), then we have to save (1, 0). */
4103
4104 static void
4105 add_outer_distances (struct data_dependence_relation *ddr,
4106 lambda_vector dist_v, int index)
4107 {
4108 /* For each outer loop where init_v is not set, the accesses are
4109 in dependence of distance 1 in the loop. */
4110 while (--index >= 0)
4111 {
4112 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4113 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4114 save_v[index] = 1;
4115 save_dist_v (ddr, save_v);
4116 }
4117 }
4118
4119 /* Return false when fail to represent the data dependence as a
4120 distance vector. A_INDEX is the index of the first reference
4121 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4122 second reference. INIT_B is set to true when a component has been
4123 added to the distance vector DIST_V. INDEX_CARRY is then set to
4124 the index in DIST_V that carries the dependence. */
4125
4126 static bool
4127 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4128 unsigned int a_index, unsigned int b_index,
4129 lambda_vector dist_v, bool *init_b,
4130 int *index_carry)
4131 {
4132 unsigned i;
4133 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4134
4135 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4136 {
4137 tree access_fn_a, access_fn_b;
4138 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4139
4140 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4141 {
4142 non_affine_dependence_relation (ddr);
4143 return false;
4144 }
4145
4146 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4147 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4148
4149 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4150 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4151 {
4152 HOST_WIDE_INT dist;
4153 int index;
4154 int var_a = CHREC_VARIABLE (access_fn_a);
4155 int var_b = CHREC_VARIABLE (access_fn_b);
4156
4157 if (var_a != var_b
4158 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4159 {
4160 non_affine_dependence_relation (ddr);
4161 return false;
4162 }
4163
4164 dist = int_cst_value (SUB_DISTANCE (subscript));
4165 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4166 *index_carry = MIN (index, *index_carry);
4167
4168 /* This is the subscript coupling test. If we have already
4169 recorded a distance for this loop (a distance coming from
4170 another subscript), it should be the same. For example,
4171 in the following code, there is no dependence:
4172
4173 | loop i = 0, N, 1
4174 | T[i+1][i] = ...
4175 | ... = T[i][i]
4176 | endloop
4177 */
4178 if (init_v[index] != 0 && dist_v[index] != dist)
4179 {
4180 finalize_ddr_dependent (ddr, chrec_known);
4181 return false;
4182 }
4183
4184 dist_v[index] = dist;
4185 init_v[index] = 1;
4186 *init_b = true;
4187 }
4188 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4189 {
4190 /* This can be for example an affine vs. constant dependence
4191 (T[i] vs. T[3]) that is not an affine dependence and is
4192 not representable as a distance vector. */
4193 non_affine_dependence_relation (ddr);
4194 return false;
4195 }
4196 }
4197
4198 return true;
4199 }
4200
4201 /* Return true when the DDR contains only constant access functions. */
4202
4203 static bool
4204 constant_access_functions (const struct data_dependence_relation *ddr)
4205 {
4206 unsigned i;
4207 subscript *sub;
4208
4209 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4210 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4211 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4212 return false;
4213
4214 return true;
4215 }
4216
4217 /* Helper function for the case where DDR_A and DDR_B are the same
4218 multivariate access function with a constant step. For an example
4219 see pr34635-1.c. */
4220
4221 static void
4222 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4223 {
4224 int x_1, x_2;
4225 tree c_1 = CHREC_LEFT (c_2);
4226 tree c_0 = CHREC_LEFT (c_1);
4227 lambda_vector dist_v;
4228 HOST_WIDE_INT v1, v2, cd;
4229
4230 /* Polynomials with more than 2 variables are not handled yet. When
4231 the evolution steps are parameters, it is not possible to
4232 represent the dependence using classical distance vectors. */
4233 if (TREE_CODE (c_0) != INTEGER_CST
4234 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4235 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4236 {
4237 DDR_AFFINE_P (ddr) = false;
4238 return;
4239 }
4240
4241 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4242 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4243
4244 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4245 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4246 v1 = int_cst_value (CHREC_RIGHT (c_1));
4247 v2 = int_cst_value (CHREC_RIGHT (c_2));
4248 cd = gcd (v1, v2);
4249 v1 /= cd;
4250 v2 /= cd;
4251
4252 if (v2 < 0)
4253 {
4254 v2 = -v2;
4255 v1 = -v1;
4256 }
4257
4258 dist_v[x_1] = v2;
4259 dist_v[x_2] = -v1;
4260 save_dist_v (ddr, dist_v);
4261
4262 add_outer_distances (ddr, dist_v, x_1);
4263 }
4264
4265 /* Helper function for the case where DDR_A and DDR_B are the same
4266 access functions. */
4267
4268 static void
4269 add_other_self_distances (struct data_dependence_relation *ddr)
4270 {
4271 lambda_vector dist_v;
4272 unsigned i;
4273 int index_carry = DDR_NB_LOOPS (ddr);
4274 subscript *sub;
4275
4276 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4277 {
4278 tree access_fun = SUB_ACCESS_FN (sub, 0);
4279
4280 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4281 {
4282 if (!evolution_function_is_univariate_p (access_fun))
4283 {
4284 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4285 {
4286 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4287 return;
4288 }
4289
4290 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4291
4292 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4293 add_multivariate_self_dist (ddr, access_fun);
4294 else
4295 /* The evolution step is not constant: it varies in
4296 the outer loop, so this cannot be represented by a
4297 distance vector. For example in pr34635.c the
4298 evolution is {0, +, {0, +, 4}_1}_2. */
4299 DDR_AFFINE_P (ddr) = false;
4300
4301 return;
4302 }
4303
4304 index_carry = MIN (index_carry,
4305 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4306 DDR_LOOP_NEST (ddr)));
4307 }
4308 }
4309
4310 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4311 add_outer_distances (ddr, dist_v, index_carry);
4312 }
4313
4314 static void
4315 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4316 {
4317 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4318
4319 dist_v[DDR_INNER_LOOP (ddr)] = 1;
4320 save_dist_v (ddr, dist_v);
4321 }
4322
4323 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4324 is the case for example when access functions are the same and
4325 equal to a constant, as in:
4326
4327 | loop_1
4328 | A[3] = ...
4329 | ... = A[3]
4330 | endloop_1
4331
4332 in which case the distance vectors are (0) and (1). */
4333
4334 static void
4335 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4336 {
4337 unsigned i, j;
4338
4339 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4340 {
4341 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4342 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4343 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4344
4345 for (j = 0; j < ca->n; j++)
4346 if (affine_function_zero_p (ca->fns[j]))
4347 {
4348 insert_innermost_unit_dist_vector (ddr);
4349 return;
4350 }
4351
4352 for (j = 0; j < cb->n; j++)
4353 if (affine_function_zero_p (cb->fns[j]))
4354 {
4355 insert_innermost_unit_dist_vector (ddr);
4356 return;
4357 }
4358 }
4359 }
4360
4361 /* Return true when the DDR contains two data references that have the
4362 same access functions. */
4363
4364 static inline bool
4365 same_access_functions (const struct data_dependence_relation *ddr)
4366 {
4367 unsigned i;
4368 subscript *sub;
4369
4370 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4371 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4372 SUB_ACCESS_FN (sub, 1)))
4373 return false;
4374
4375 return true;
4376 }
4377
4378 /* Compute the classic per loop distance vector. DDR is the data
4379 dependence relation to build a vector from. Return false when fail
4380 to represent the data dependence as a distance vector. */
4381
4382 static bool
4383 build_classic_dist_vector (struct data_dependence_relation *ddr,
4384 struct loop *loop_nest)
4385 {
4386 bool init_b = false;
4387 int index_carry = DDR_NB_LOOPS (ddr);
4388 lambda_vector dist_v;
4389
4390 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4391 return false;
4392
4393 if (same_access_functions (ddr))
4394 {
4395 /* Save the 0 vector. */
4396 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4397 save_dist_v (ddr, dist_v);
4398
4399 if (constant_access_functions (ddr))
4400 add_distance_for_zero_overlaps (ddr);
4401
4402 if (DDR_NB_LOOPS (ddr) > 1)
4403 add_other_self_distances (ddr);
4404
4405 return true;
4406 }
4407
4408 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4409 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4410 return false;
4411
4412 /* Save the distance vector if we initialized one. */
4413 if (init_b)
4414 {
4415 /* Verify a basic constraint: classic distance vectors should
4416 always be lexicographically positive.
4417
4418 Data references are collected in the order of execution of
4419 the program, thus for the following loop
4420
4421 | for (i = 1; i < 100; i++)
4422 | for (j = 1; j < 100; j++)
4423 | {
4424 | t = T[j+1][i-1]; // A
4425 | T[j][i] = t + 2; // B
4426 | }
4427
4428 references are collected following the direction of the wind:
4429 A then B. The data dependence tests are performed also
4430 following this order, such that we're looking at the distance
4431 separating the elements accessed by A from the elements later
4432 accessed by B. But in this example, the distance returned by
4433 test_dep (A, B) is lexicographically negative (-1, 1), that
4434 means that the access A occurs later than B with respect to
4435 the outer loop, ie. we're actually looking upwind. In this
4436 case we solve test_dep (B, A) looking downwind to the
4437 lexicographically positive solution, that returns the
4438 distance vector (1, -1). */
4439 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4440 {
4441 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4442 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4443 return false;
4444 compute_subscript_distance (ddr);
4445 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4446 &index_carry))
4447 return false;
4448 save_dist_v (ddr, save_v);
4449 DDR_REVERSED_P (ddr) = true;
4450
4451 /* In this case there is a dependence forward for all the
4452 outer loops:
4453
4454 | for (k = 1; k < 100; k++)
4455 | for (i = 1; i < 100; i++)
4456 | for (j = 1; j < 100; j++)
4457 | {
4458 | t = T[j+1][i-1]; // A
4459 | T[j][i] = t + 2; // B
4460 | }
4461
4462 the vectors are:
4463 (0, 1, -1)
4464 (1, 1, -1)
4465 (1, -1, 1)
4466 */
4467 if (DDR_NB_LOOPS (ddr) > 1)
4468 {
4469 add_outer_distances (ddr, save_v, index_carry);
4470 add_outer_distances (ddr, dist_v, index_carry);
4471 }
4472 }
4473 else
4474 {
4475 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4476 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4477
4478 if (DDR_NB_LOOPS (ddr) > 1)
4479 {
4480 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4481
4482 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4483 return false;
4484 compute_subscript_distance (ddr);
4485 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4486 &index_carry))
4487 return false;
4488
4489 save_dist_v (ddr, save_v);
4490 add_outer_distances (ddr, dist_v, index_carry);
4491 add_outer_distances (ddr, opposite_v, index_carry);
4492 }
4493 else
4494 save_dist_v (ddr, save_v);
4495 }
4496 }
4497 else
4498 {
4499 /* There is a distance of 1 on all the outer loops: Example:
4500 there is a dependence of distance 1 on loop_1 for the array A.
4501
4502 | loop_1
4503 | A[5] = ...
4504 | endloop
4505 */
4506 add_outer_distances (ddr, dist_v,
4507 lambda_vector_first_nz (dist_v,
4508 DDR_NB_LOOPS (ddr), 0));
4509 }
4510
4511 if (dump_file && (dump_flags & TDF_DETAILS))
4512 {
4513 unsigned i;
4514
4515 fprintf (dump_file, "(build_classic_dist_vector\n");
4516 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4517 {
4518 fprintf (dump_file, " dist_vector = (");
4519 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4520 DDR_NB_LOOPS (ddr));
4521 fprintf (dump_file, " )\n");
4522 }
4523 fprintf (dump_file, ")\n");
4524 }
4525
4526 return true;
4527 }
4528
4529 /* Return the direction for a given distance.
4530 FIXME: Computing dir this way is suboptimal, since dir can catch
4531 cases that dist is unable to represent. */
4532
4533 static inline enum data_dependence_direction
4534 dir_from_dist (int dist)
4535 {
4536 if (dist > 0)
4537 return dir_positive;
4538 else if (dist < 0)
4539 return dir_negative;
4540 else
4541 return dir_equal;
4542 }
4543
4544 /* Compute the classic per loop direction vector. DDR is the data
4545 dependence relation to build a vector from. */
4546
4547 static void
4548 build_classic_dir_vector (struct data_dependence_relation *ddr)
4549 {
4550 unsigned i, j;
4551 lambda_vector dist_v;
4552
4553 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4554 {
4555 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4556
4557 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4558 dir_v[j] = dir_from_dist (dist_v[j]);
4559
4560 save_dir_v (ddr, dir_v);
4561 }
4562 }
4563
4564 /* Helper function. Returns true when there is a dependence between the
4565 data references. A_INDEX is the index of the first reference (0 for
4566 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4567
4568 static bool
4569 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4570 unsigned int a_index, unsigned int b_index,
4571 struct loop *loop_nest)
4572 {
4573 unsigned int i;
4574 tree last_conflicts;
4575 struct subscript *subscript;
4576 tree res = NULL_TREE;
4577
4578 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4579 {
4580 conflict_function *overlaps_a, *overlaps_b;
4581
4582 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4583 SUB_ACCESS_FN (subscript, b_index),
4584 &overlaps_a, &overlaps_b,
4585 &last_conflicts, loop_nest);
4586
4587 if (SUB_CONFLICTS_IN_A (subscript))
4588 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4589 if (SUB_CONFLICTS_IN_B (subscript))
4590 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4591
4592 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4593 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4594 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4595
4596 /* If there is any undetermined conflict function we have to
4597 give a conservative answer in case we cannot prove that
4598 no dependence exists when analyzing another subscript. */
4599 if (CF_NOT_KNOWN_P (overlaps_a)
4600 || CF_NOT_KNOWN_P (overlaps_b))
4601 {
4602 res = chrec_dont_know;
4603 continue;
4604 }
4605
4606 /* When there is a subscript with no dependence we can stop. */
4607 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4608 || CF_NO_DEPENDENCE_P (overlaps_b))
4609 {
4610 res = chrec_known;
4611 break;
4612 }
4613 }
4614
4615 if (res == NULL_TREE)
4616 return true;
4617
4618 if (res == chrec_known)
4619 dependence_stats.num_dependence_independent++;
4620 else
4621 dependence_stats.num_dependence_undetermined++;
4622 finalize_ddr_dependent (ddr, res);
4623 return false;
4624 }
4625
4626 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4627
4628 static void
4629 subscript_dependence_tester (struct data_dependence_relation *ddr,
4630 struct loop *loop_nest)
4631 {
4632 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4633 dependence_stats.num_dependence_dependent++;
4634
4635 compute_subscript_distance (ddr);
4636 if (build_classic_dist_vector (ddr, loop_nest))
4637 build_classic_dir_vector (ddr);
4638 }
4639
4640 /* Returns true when all the access functions of A are affine or
4641 constant with respect to LOOP_NEST. */
4642
4643 static bool
4644 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4645 const struct loop *loop_nest)
4646 {
4647 unsigned int i;
4648 vec<tree> fns = DR_ACCESS_FNS (a);
4649 tree t;
4650
4651 FOR_EACH_VEC_ELT (fns, i, t)
4652 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4653 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4654 return false;
4655
4656 return true;
4657 }
4658
4659 /* This computes the affine dependence relation between A and B with
4660 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4661 independence between two accesses, while CHREC_DONT_KNOW is used
4662 for representing the unknown relation.
4663
4664 Note that it is possible to stop the computation of the dependence
4665 relation the first time we detect a CHREC_KNOWN element for a given
4666 subscript. */
4667
4668 void
4669 compute_affine_dependence (struct data_dependence_relation *ddr,
4670 struct loop *loop_nest)
4671 {
4672 struct data_reference *dra = DDR_A (ddr);
4673 struct data_reference *drb = DDR_B (ddr);
4674
4675 if (dump_file && (dump_flags & TDF_DETAILS))
4676 {
4677 fprintf (dump_file, "(compute_affine_dependence\n");
4678 fprintf (dump_file, " stmt_a: ");
4679 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4680 fprintf (dump_file, " stmt_b: ");
4681 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4682 }
4683
4684 /* Analyze only when the dependence relation is not yet known. */
4685 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4686 {
4687 dependence_stats.num_dependence_tests++;
4688
4689 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4690 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4691 subscript_dependence_tester (ddr, loop_nest);
4692
4693 /* As a last case, if the dependence cannot be determined, or if
4694 the dependence is considered too difficult to determine, answer
4695 "don't know". */
4696 else
4697 {
4698 dependence_stats.num_dependence_undetermined++;
4699
4700 if (dump_file && (dump_flags & TDF_DETAILS))
4701 {
4702 fprintf (dump_file, "Data ref a:\n");
4703 dump_data_reference (dump_file, dra);
4704 fprintf (dump_file, "Data ref b:\n");
4705 dump_data_reference (dump_file, drb);
4706 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4707 }
4708 finalize_ddr_dependent (ddr, chrec_dont_know);
4709 }
4710 }
4711
4712 if (dump_file && (dump_flags & TDF_DETAILS))
4713 {
4714 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4715 fprintf (dump_file, ") -> no dependence\n");
4716 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4717 fprintf (dump_file, ") -> dependence analysis failed\n");
4718 else
4719 fprintf (dump_file, ")\n");
4720 }
4721 }
4722
4723 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4724 the data references in DATAREFS, in the LOOP_NEST. When
4725 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4726 relations. Return true when successful, i.e. data references number
4727 is small enough to be handled. */
4728
4729 bool
4730 compute_all_dependences (vec<data_reference_p> datarefs,
4731 vec<ddr_p> *dependence_relations,
4732 vec<loop_p> loop_nest,
4733 bool compute_self_and_rr)
4734 {
4735 struct data_dependence_relation *ddr;
4736 struct data_reference *a, *b;
4737 unsigned int i, j;
4738
4739 if ((int) datarefs.length ()
4740 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4741 {
4742 struct data_dependence_relation *ddr;
4743
4744 /* Insert a single relation into dependence_relations:
4745 chrec_dont_know. */
4746 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4747 dependence_relations->safe_push (ddr);
4748 return false;
4749 }
4750
4751 FOR_EACH_VEC_ELT (datarefs, i, a)
4752 for (j = i + 1; datarefs.iterate (j, &b); j++)
4753 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4754 {
4755 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4756 dependence_relations->safe_push (ddr);
4757 if (loop_nest.exists ())
4758 compute_affine_dependence (ddr, loop_nest[0]);
4759 }
4760
4761 if (compute_self_and_rr)
4762 FOR_EACH_VEC_ELT (datarefs, i, a)
4763 {
4764 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4765 dependence_relations->safe_push (ddr);
4766 if (loop_nest.exists ())
4767 compute_affine_dependence (ddr, loop_nest[0]);
4768 }
4769
4770 return true;
4771 }
4772
4773 /* Describes a location of a memory reference. */
4774
4775 struct data_ref_loc
4776 {
4777 /* The memory reference. */
4778 tree ref;
4779
4780 /* True if the memory reference is read. */
4781 bool is_read;
4782
4783 /* True if the data reference is conditional within the containing
4784 statement, i.e. if it might not occur even when the statement
4785 is executed and runs to completion. */
4786 bool is_conditional_in_stmt;
4787 };
4788
4789
4790 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4791 true if STMT clobbers memory, false otherwise. */
4792
4793 static bool
4794 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4795 {
4796 bool clobbers_memory = false;
4797 data_ref_loc ref;
4798 tree op0, op1;
4799 enum gimple_code stmt_code = gimple_code (stmt);
4800
4801 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4802 As we cannot model data-references to not spelled out
4803 accesses give up if they may occur. */
4804 if (stmt_code == GIMPLE_CALL
4805 && !(gimple_call_flags (stmt) & ECF_CONST))
4806 {
4807 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4808 if (gimple_call_internal_p (stmt))
4809 switch (gimple_call_internal_fn (stmt))
4810 {
4811 case IFN_GOMP_SIMD_LANE:
4812 {
4813 struct loop *loop = gimple_bb (stmt)->loop_father;
4814 tree uid = gimple_call_arg (stmt, 0);
4815 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4816 if (loop == NULL
4817 || loop->simduid != SSA_NAME_VAR (uid))
4818 clobbers_memory = true;
4819 break;
4820 }
4821 case IFN_MASK_LOAD:
4822 case IFN_MASK_STORE:
4823 break;
4824 default:
4825 clobbers_memory = true;
4826 break;
4827 }
4828 else
4829 clobbers_memory = true;
4830 }
4831 else if (stmt_code == GIMPLE_ASM
4832 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4833 || gimple_vuse (stmt)))
4834 clobbers_memory = true;
4835
4836 if (!gimple_vuse (stmt))
4837 return clobbers_memory;
4838
4839 if (stmt_code == GIMPLE_ASSIGN)
4840 {
4841 tree base;
4842 op0 = gimple_assign_lhs (stmt);
4843 op1 = gimple_assign_rhs1 (stmt);
4844
4845 if (DECL_P (op1)
4846 || (REFERENCE_CLASS_P (op1)
4847 && (base = get_base_address (op1))
4848 && TREE_CODE (base) != SSA_NAME
4849 && !is_gimple_min_invariant (base)))
4850 {
4851 ref.ref = op1;
4852 ref.is_read = true;
4853 ref.is_conditional_in_stmt = false;
4854 references->safe_push (ref);
4855 }
4856 }
4857 else if (stmt_code == GIMPLE_CALL)
4858 {
4859 unsigned i, n;
4860 tree ptr, type;
4861 unsigned int align;
4862
4863 ref.is_read = false;
4864 if (gimple_call_internal_p (stmt))
4865 switch (gimple_call_internal_fn (stmt))
4866 {
4867 case IFN_MASK_LOAD:
4868 if (gimple_call_lhs (stmt) == NULL_TREE)
4869 break;
4870 ref.is_read = true;
4871 /* FALLTHRU */
4872 case IFN_MASK_STORE:
4873 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4874 align = tree_to_shwi (gimple_call_arg (stmt, 1));
4875 if (ref.is_read)
4876 type = TREE_TYPE (gimple_call_lhs (stmt));
4877 else
4878 type = TREE_TYPE (gimple_call_arg (stmt, 3));
4879 if (TYPE_ALIGN (type) != align)
4880 type = build_aligned_type (type, align);
4881 ref.is_conditional_in_stmt = true;
4882 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4883 ptr);
4884 references->safe_push (ref);
4885 return false;
4886 default:
4887 break;
4888 }
4889
4890 op0 = gimple_call_lhs (stmt);
4891 n = gimple_call_num_args (stmt);
4892 for (i = 0; i < n; i++)
4893 {
4894 op1 = gimple_call_arg (stmt, i);
4895
4896 if (DECL_P (op1)
4897 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4898 {
4899 ref.ref = op1;
4900 ref.is_read = true;
4901 ref.is_conditional_in_stmt = false;
4902 references->safe_push (ref);
4903 }
4904 }
4905 }
4906 else
4907 return clobbers_memory;
4908
4909 if (op0
4910 && (DECL_P (op0)
4911 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4912 {
4913 ref.ref = op0;
4914 ref.is_read = false;
4915 ref.is_conditional_in_stmt = false;
4916 references->safe_push (ref);
4917 }
4918 return clobbers_memory;
4919 }
4920
4921
4922 /* Returns true if the loop-nest has any data reference. */
4923
4924 bool
4925 loop_nest_has_data_refs (loop_p loop)
4926 {
4927 basic_block *bbs = get_loop_body (loop);
4928 auto_vec<data_ref_loc, 3> references;
4929
4930 for (unsigned i = 0; i < loop->num_nodes; i++)
4931 {
4932 basic_block bb = bbs[i];
4933 gimple_stmt_iterator bsi;
4934
4935 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4936 {
4937 gimple *stmt = gsi_stmt (bsi);
4938 get_references_in_stmt (stmt, &references);
4939 if (references.length ())
4940 {
4941 free (bbs);
4942 return true;
4943 }
4944 }
4945 }
4946 free (bbs);
4947
4948 if (loop->inner)
4949 {
4950 loop = loop->inner;
4951 while (loop)
4952 {
4953 if (loop_nest_has_data_refs (loop))
4954 return true;
4955 loop = loop->next;
4956 }
4957 }
4958 return false;
4959 }
4960
4961 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4962 reference, returns false, otherwise returns true. NEST is the outermost
4963 loop of the loop nest in which the references should be analyzed. */
4964
4965 bool
4966 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4967 vec<data_reference_p> *datarefs)
4968 {
4969 unsigned i;
4970 auto_vec<data_ref_loc, 2> references;
4971 data_ref_loc *ref;
4972 bool ret = true;
4973 data_reference_p dr;
4974
4975 if (get_references_in_stmt (stmt, &references))
4976 return false;
4977
4978 FOR_EACH_VEC_ELT (references, i, ref)
4979 {
4980 dr = create_data_ref (nest, loop_containing_stmt (stmt), ref->ref,
4981 stmt, ref->is_read, ref->is_conditional_in_stmt);
4982 gcc_assert (dr != NULL);
4983 datarefs->safe_push (dr);
4984 }
4985
4986 return ret;
4987 }
4988
4989 /* Stores the data references in STMT to DATAREFS. If there is an
4990 unanalyzable reference, returns false, otherwise returns true.
4991 NEST is the outermost loop of the loop nest in which the references
4992 should be instantiated, LOOP is the loop in which the references
4993 should be analyzed. */
4994
4995 bool
4996 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple *stmt,
4997 vec<data_reference_p> *datarefs)
4998 {
4999 unsigned i;
5000 auto_vec<data_ref_loc, 2> references;
5001 data_ref_loc *ref;
5002 bool ret = true;
5003 data_reference_p dr;
5004
5005 if (get_references_in_stmt (stmt, &references))
5006 return false;
5007
5008 FOR_EACH_VEC_ELT (references, i, ref)
5009 {
5010 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5011 ref->is_conditional_in_stmt);
5012 gcc_assert (dr != NULL);
5013 datarefs->safe_push (dr);
5014 }
5015
5016 return ret;
5017 }
5018
5019 /* Search the data references in LOOP, and record the information into
5020 DATAREFS. Returns chrec_dont_know when failing to analyze a
5021 difficult case, returns NULL_TREE otherwise. */
5022
5023 tree
5024 find_data_references_in_bb (struct loop *loop, basic_block bb,
5025 vec<data_reference_p> *datarefs)
5026 {
5027 gimple_stmt_iterator bsi;
5028
5029 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5030 {
5031 gimple *stmt = gsi_stmt (bsi);
5032
5033 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5034 {
5035 struct data_reference *res;
5036 res = XCNEW (struct data_reference);
5037 datarefs->safe_push (res);
5038
5039 return chrec_dont_know;
5040 }
5041 }
5042
5043 return NULL_TREE;
5044 }
5045
5046 /* Search the data references in LOOP, and record the information into
5047 DATAREFS. Returns chrec_dont_know when failing to analyze a
5048 difficult case, returns NULL_TREE otherwise.
5049
5050 TODO: This function should be made smarter so that it can handle address
5051 arithmetic as if they were array accesses, etc. */
5052
5053 tree
5054 find_data_references_in_loop (struct loop *loop,
5055 vec<data_reference_p> *datarefs)
5056 {
5057 basic_block bb, *bbs;
5058 unsigned int i;
5059
5060 bbs = get_loop_body_in_dom_order (loop);
5061
5062 for (i = 0; i < loop->num_nodes; i++)
5063 {
5064 bb = bbs[i];
5065
5066 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5067 {
5068 free (bbs);
5069 return chrec_dont_know;
5070 }
5071 }
5072 free (bbs);
5073
5074 return NULL_TREE;
5075 }
5076
5077 /* Return the alignment in bytes that DRB is guaranteed to have at all
5078 times. */
5079
5080 unsigned int
5081 dr_alignment (innermost_loop_behavior *drb)
5082 {
5083 /* Get the alignment of BASE_ADDRESS + INIT. */
5084 unsigned int alignment = drb->base_alignment;
5085 unsigned int misalignment = (drb->base_misalignment
5086 + TREE_INT_CST_LOW (drb->init));
5087 if (misalignment != 0)
5088 alignment = MIN (alignment, misalignment & -misalignment);
5089
5090 /* Cap it to the alignment of OFFSET. */
5091 if (!integer_zerop (drb->offset))
5092 alignment = MIN (alignment, drb->offset_alignment);
5093
5094 /* Cap it to the alignment of STEP. */
5095 if (!integer_zerop (drb->step))
5096 alignment = MIN (alignment, drb->step_alignment);
5097
5098 return alignment;
5099 }
5100
5101 /* Recursive helper function. */
5102
5103 static bool
5104 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5105 {
5106 /* Inner loops of the nest should not contain siblings. Example:
5107 when there are two consecutive loops,
5108
5109 | loop_0
5110 | loop_1
5111 | A[{0, +, 1}_1]
5112 | endloop_1
5113 | loop_2
5114 | A[{0, +, 1}_2]
5115 | endloop_2
5116 | endloop_0
5117
5118 the dependence relation cannot be captured by the distance
5119 abstraction. */
5120 if (loop->next)
5121 return false;
5122
5123 loop_nest->safe_push (loop);
5124 if (loop->inner)
5125 return find_loop_nest_1 (loop->inner, loop_nest);
5126 return true;
5127 }
5128
5129 /* Return false when the LOOP is not well nested. Otherwise return
5130 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5131 contain the loops from the outermost to the innermost, as they will
5132 appear in the classic distance vector. */
5133
5134 bool
5135 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5136 {
5137 loop_nest->safe_push (loop);
5138 if (loop->inner)
5139 return find_loop_nest_1 (loop->inner, loop_nest);
5140 return true;
5141 }
5142
5143 /* Returns true when the data dependences have been computed, false otherwise.
5144 Given a loop nest LOOP, the following vectors are returned:
5145 DATAREFS is initialized to all the array elements contained in this loop,
5146 DEPENDENCE_RELATIONS contains the relations between the data references.
5147 Compute read-read and self relations if
5148 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5149
5150 bool
5151 compute_data_dependences_for_loop (struct loop *loop,
5152 bool compute_self_and_read_read_dependences,
5153 vec<loop_p> *loop_nest,
5154 vec<data_reference_p> *datarefs,
5155 vec<ddr_p> *dependence_relations)
5156 {
5157 bool res = true;
5158
5159 memset (&dependence_stats, 0, sizeof (dependence_stats));
5160
5161 /* If the loop nest is not well formed, or one of the data references
5162 is not computable, give up without spending time to compute other
5163 dependences. */
5164 if (!loop
5165 || !find_loop_nest (loop, loop_nest)
5166 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5167 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5168 compute_self_and_read_read_dependences))
5169 res = false;
5170
5171 if (dump_file && (dump_flags & TDF_STATS))
5172 {
5173 fprintf (dump_file, "Dependence tester statistics:\n");
5174
5175 fprintf (dump_file, "Number of dependence tests: %d\n",
5176 dependence_stats.num_dependence_tests);
5177 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5178 dependence_stats.num_dependence_dependent);
5179 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5180 dependence_stats.num_dependence_independent);
5181 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5182 dependence_stats.num_dependence_undetermined);
5183
5184 fprintf (dump_file, "Number of subscript tests: %d\n",
5185 dependence_stats.num_subscript_tests);
5186 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5187 dependence_stats.num_subscript_undetermined);
5188 fprintf (dump_file, "Number of same subscript function: %d\n",
5189 dependence_stats.num_same_subscript_function);
5190
5191 fprintf (dump_file, "Number of ziv tests: %d\n",
5192 dependence_stats.num_ziv);
5193 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5194 dependence_stats.num_ziv_dependent);
5195 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5196 dependence_stats.num_ziv_independent);
5197 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5198 dependence_stats.num_ziv_unimplemented);
5199
5200 fprintf (dump_file, "Number of siv tests: %d\n",
5201 dependence_stats.num_siv);
5202 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5203 dependence_stats.num_siv_dependent);
5204 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5205 dependence_stats.num_siv_independent);
5206 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5207 dependence_stats.num_siv_unimplemented);
5208
5209 fprintf (dump_file, "Number of miv tests: %d\n",
5210 dependence_stats.num_miv);
5211 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5212 dependence_stats.num_miv_dependent);
5213 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5214 dependence_stats.num_miv_independent);
5215 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5216 dependence_stats.num_miv_unimplemented);
5217 }
5218
5219 return res;
5220 }
5221
5222 /* Free the memory used by a data dependence relation DDR. */
5223
5224 void
5225 free_dependence_relation (struct data_dependence_relation *ddr)
5226 {
5227 if (ddr == NULL)
5228 return;
5229
5230 if (DDR_SUBSCRIPTS (ddr).exists ())
5231 free_subscripts (DDR_SUBSCRIPTS (ddr));
5232 DDR_DIST_VECTS (ddr).release ();
5233 DDR_DIR_VECTS (ddr).release ();
5234
5235 free (ddr);
5236 }
5237
5238 /* Free the memory used by the data dependence relations from
5239 DEPENDENCE_RELATIONS. */
5240
5241 void
5242 free_dependence_relations (vec<ddr_p> dependence_relations)
5243 {
5244 unsigned int i;
5245 struct data_dependence_relation *ddr;
5246
5247 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5248 if (ddr)
5249 free_dependence_relation (ddr);
5250
5251 dependence_relations.release ();
5252 }
5253
5254 /* Free the memory used by the data references from DATAREFS. */
5255
5256 void
5257 free_data_refs (vec<data_reference_p> datarefs)
5258 {
5259 unsigned int i;
5260 struct data_reference *dr;
5261
5262 FOR_EACH_VEC_ELT (datarefs, i, dr)
5263 free_data_ref (dr);
5264 datarefs.release ();
5265 }