]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-data-ref.c
Print the type of alias check in a dump message
[thirdparty/gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99
100 static struct datadep_stats
101 {
102 int num_dependence_tests;
103 int num_dependence_dependent;
104 int num_dependence_independent;
105 int num_dependence_undetermined;
106
107 int num_subscript_tests;
108 int num_subscript_undetermined;
109 int num_same_subscript_function;
110
111 int num_ziv;
112 int num_ziv_independent;
113 int num_ziv_dependent;
114 int num_ziv_unimplemented;
115
116 int num_siv;
117 int num_siv_independent;
118 int num_siv_dependent;
119 int num_siv_unimplemented;
120
121 int num_miv;
122 int num_miv_independent;
123 int num_miv_dependent;
124 int num_miv_unimplemented;
125 } dependence_stats;
126
127 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
128 unsigned int, unsigned int,
129 class loop *);
130 /* Returns true iff A divides B. */
131
132 static inline bool
133 tree_fold_divides_p (const_tree a, const_tree b)
134 {
135 gcc_assert (TREE_CODE (a) == INTEGER_CST);
136 gcc_assert (TREE_CODE (b) == INTEGER_CST);
137 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
138 }
139
140 /* Returns true iff A divides B. */
141
142 static inline bool
143 int_divides_p (int a, int b)
144 {
145 return ((b % a) == 0);
146 }
147
148 /* Return true if reference REF contains a union access. */
149
150 static bool
151 ref_contains_union_access_p (tree ref)
152 {
153 while (handled_component_p (ref))
154 {
155 ref = TREE_OPERAND (ref, 0);
156 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
157 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
158 return true;
159 }
160 return false;
161 }
162
163 \f
164
165 /* Dump into FILE all the data references from DATAREFS. */
166
167 static void
168 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
169 {
170 unsigned int i;
171 struct data_reference *dr;
172
173 FOR_EACH_VEC_ELT (datarefs, i, dr)
174 dump_data_reference (file, dr);
175 }
176
177 /* Unified dump into FILE all the data references from DATAREFS. */
178
179 DEBUG_FUNCTION void
180 debug (vec<data_reference_p> &ref)
181 {
182 dump_data_references (stderr, ref);
183 }
184
185 DEBUG_FUNCTION void
186 debug (vec<data_reference_p> *ptr)
187 {
188 if (ptr)
189 debug (*ptr);
190 else
191 fprintf (stderr, "<nil>\n");
192 }
193
194
195 /* Dump into STDERR all the data references from DATAREFS. */
196
197 DEBUG_FUNCTION void
198 debug_data_references (vec<data_reference_p> datarefs)
199 {
200 dump_data_references (stderr, datarefs);
201 }
202
203 /* Print to STDERR the data_reference DR. */
204
205 DEBUG_FUNCTION void
206 debug_data_reference (struct data_reference *dr)
207 {
208 dump_data_reference (stderr, dr);
209 }
210
211 /* Dump function for a DATA_REFERENCE structure. */
212
213 void
214 dump_data_reference (FILE *outf,
215 struct data_reference *dr)
216 {
217 unsigned int i;
218
219 fprintf (outf, "#(Data Ref: \n");
220 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
221 fprintf (outf, "# stmt: ");
222 print_gimple_stmt (outf, DR_STMT (dr), 0);
223 fprintf (outf, "# ref: ");
224 print_generic_stmt (outf, DR_REF (dr));
225 fprintf (outf, "# base_object: ");
226 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
227
228 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
229 {
230 fprintf (outf, "# Access function %d: ", i);
231 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
232 }
233 fprintf (outf, "#)\n");
234 }
235
236 /* Unified dump function for a DATA_REFERENCE structure. */
237
238 DEBUG_FUNCTION void
239 debug (data_reference &ref)
240 {
241 dump_data_reference (stderr, &ref);
242 }
243
244 DEBUG_FUNCTION void
245 debug (data_reference *ptr)
246 {
247 if (ptr)
248 debug (*ptr);
249 else
250 fprintf (stderr, "<nil>\n");
251 }
252
253
254 /* Dumps the affine function described by FN to the file OUTF. */
255
256 DEBUG_FUNCTION void
257 dump_affine_function (FILE *outf, affine_fn fn)
258 {
259 unsigned i;
260 tree coef;
261
262 print_generic_expr (outf, fn[0], TDF_SLIM);
263 for (i = 1; fn.iterate (i, &coef); i++)
264 {
265 fprintf (outf, " + ");
266 print_generic_expr (outf, coef, TDF_SLIM);
267 fprintf (outf, " * x_%u", i);
268 }
269 }
270
271 /* Dumps the conflict function CF to the file OUTF. */
272
273 DEBUG_FUNCTION void
274 dump_conflict_function (FILE *outf, conflict_function *cf)
275 {
276 unsigned i;
277
278 if (cf->n == NO_DEPENDENCE)
279 fprintf (outf, "no dependence");
280 else if (cf->n == NOT_KNOWN)
281 fprintf (outf, "not known");
282 else
283 {
284 for (i = 0; i < cf->n; i++)
285 {
286 if (i != 0)
287 fprintf (outf, " ");
288 fprintf (outf, "[");
289 dump_affine_function (outf, cf->fns[i]);
290 fprintf (outf, "]");
291 }
292 }
293 }
294
295 /* Dump function for a SUBSCRIPT structure. */
296
297 DEBUG_FUNCTION void
298 dump_subscript (FILE *outf, struct subscript *subscript)
299 {
300 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
301
302 fprintf (outf, "\n (subscript \n");
303 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
304 dump_conflict_function (outf, cf);
305 if (CF_NONTRIVIAL_P (cf))
306 {
307 tree last_iteration = SUB_LAST_CONFLICT (subscript);
308 fprintf (outf, "\n last_conflict: ");
309 print_generic_expr (outf, last_iteration);
310 }
311
312 cf = SUB_CONFLICTS_IN_B (subscript);
313 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
314 dump_conflict_function (outf, cf);
315 if (CF_NONTRIVIAL_P (cf))
316 {
317 tree last_iteration = SUB_LAST_CONFLICT (subscript);
318 fprintf (outf, "\n last_conflict: ");
319 print_generic_expr (outf, last_iteration);
320 }
321
322 fprintf (outf, "\n (Subscript distance: ");
323 print_generic_expr (outf, SUB_DISTANCE (subscript));
324 fprintf (outf, " ))\n");
325 }
326
327 /* Print the classic direction vector DIRV to OUTF. */
328
329 DEBUG_FUNCTION void
330 print_direction_vector (FILE *outf,
331 lambda_vector dirv,
332 int length)
333 {
334 int eq;
335
336 for (eq = 0; eq < length; eq++)
337 {
338 enum data_dependence_direction dir = ((enum data_dependence_direction)
339 dirv[eq]);
340
341 switch (dir)
342 {
343 case dir_positive:
344 fprintf (outf, " +");
345 break;
346 case dir_negative:
347 fprintf (outf, " -");
348 break;
349 case dir_equal:
350 fprintf (outf, " =");
351 break;
352 case dir_positive_or_equal:
353 fprintf (outf, " +=");
354 break;
355 case dir_positive_or_negative:
356 fprintf (outf, " +-");
357 break;
358 case dir_negative_or_equal:
359 fprintf (outf, " -=");
360 break;
361 case dir_star:
362 fprintf (outf, " *");
363 break;
364 default:
365 fprintf (outf, "indep");
366 break;
367 }
368 }
369 fprintf (outf, "\n");
370 }
371
372 /* Print a vector of direction vectors. */
373
374 DEBUG_FUNCTION void
375 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
376 int length)
377 {
378 unsigned j;
379 lambda_vector v;
380
381 FOR_EACH_VEC_ELT (dir_vects, j, v)
382 print_direction_vector (outf, v, length);
383 }
384
385 /* Print out a vector VEC of length N to OUTFILE. */
386
387 DEBUG_FUNCTION void
388 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 {
390 int i;
391
392 for (i = 0; i < n; i++)
393 fprintf (outfile, "%3d ", (int)vector[i]);
394 fprintf (outfile, "\n");
395 }
396
397 /* Print a vector of distance vectors. */
398
399 DEBUG_FUNCTION void
400 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
401 int length)
402 {
403 unsigned j;
404 lambda_vector v;
405
406 FOR_EACH_VEC_ELT (dist_vects, j, v)
407 print_lambda_vector (outf, v, length);
408 }
409
410 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
411
412 DEBUG_FUNCTION void
413 dump_data_dependence_relation (FILE *outf,
414 struct data_dependence_relation *ddr)
415 {
416 struct data_reference *dra, *drb;
417
418 fprintf (outf, "(Data Dep: \n");
419
420 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
421 {
422 if (ddr)
423 {
424 dra = DDR_A (ddr);
425 drb = DDR_B (ddr);
426 if (dra)
427 dump_data_reference (outf, dra);
428 else
429 fprintf (outf, " (nil)\n");
430 if (drb)
431 dump_data_reference (outf, drb);
432 else
433 fprintf (outf, " (nil)\n");
434 }
435 fprintf (outf, " (don't know)\n)\n");
436 return;
437 }
438
439 dra = DDR_A (ddr);
440 drb = DDR_B (ddr);
441 dump_data_reference (outf, dra);
442 dump_data_reference (outf, drb);
443
444 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
445 fprintf (outf, " (no dependence)\n");
446
447 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
448 {
449 unsigned int i;
450 class loop *loopi;
451
452 subscript *sub;
453 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
454 {
455 fprintf (outf, " access_fn_A: ");
456 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
457 fprintf (outf, " access_fn_B: ");
458 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
459 dump_subscript (outf, sub);
460 }
461
462 fprintf (outf, " loop nest: (");
463 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
464 fprintf (outf, "%d ", loopi->num);
465 fprintf (outf, ")\n");
466
467 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
468 {
469 fprintf (outf, " distance_vector: ");
470 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
471 DDR_NB_LOOPS (ddr));
472 }
473
474 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
475 {
476 fprintf (outf, " direction_vector: ");
477 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
478 DDR_NB_LOOPS (ddr));
479 }
480 }
481
482 fprintf (outf, ")\n");
483 }
484
485 /* Debug version. */
486
487 DEBUG_FUNCTION void
488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
489 {
490 dump_data_dependence_relation (stderr, ddr);
491 }
492
493 /* Dump into FILE all the dependence relations from DDRS. */
494
495 DEBUG_FUNCTION void
496 dump_data_dependence_relations (FILE *file,
497 vec<ddr_p> ddrs)
498 {
499 unsigned int i;
500 struct data_dependence_relation *ddr;
501
502 FOR_EACH_VEC_ELT (ddrs, i, ddr)
503 dump_data_dependence_relation (file, ddr);
504 }
505
506 DEBUG_FUNCTION void
507 debug (vec<ddr_p> &ref)
508 {
509 dump_data_dependence_relations (stderr, ref);
510 }
511
512 DEBUG_FUNCTION void
513 debug (vec<ddr_p> *ptr)
514 {
515 if (ptr)
516 debug (*ptr);
517 else
518 fprintf (stderr, "<nil>\n");
519 }
520
521
522 /* Dump to STDERR all the dependence relations from DDRS. */
523
524 DEBUG_FUNCTION void
525 debug_data_dependence_relations (vec<ddr_p> ddrs)
526 {
527 dump_data_dependence_relations (stderr, ddrs);
528 }
529
530 /* Dumps the distance and direction vectors in FILE. DDRS contains
531 the dependence relations, and VECT_SIZE is the size of the
532 dependence vectors, or in other words the number of loops in the
533 considered nest. */
534
535 DEBUG_FUNCTION void
536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
537 {
538 unsigned int i, j;
539 struct data_dependence_relation *ddr;
540 lambda_vector v;
541
542 FOR_EACH_VEC_ELT (ddrs, i, ddr)
543 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
544 {
545 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
546 {
547 fprintf (file, "DISTANCE_V (");
548 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
549 fprintf (file, ")\n");
550 }
551
552 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
553 {
554 fprintf (file, "DIRECTION_V (");
555 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
556 fprintf (file, ")\n");
557 }
558 }
559
560 fprintf (file, "\n\n");
561 }
562
563 /* Dumps the data dependence relations DDRS in FILE. */
564
565 DEBUG_FUNCTION void
566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
567 {
568 unsigned int i;
569 struct data_dependence_relation *ddr;
570
571 FOR_EACH_VEC_ELT (ddrs, i, ddr)
572 dump_data_dependence_relation (file, ddr);
573
574 fprintf (file, "\n\n");
575 }
576
577 DEBUG_FUNCTION void
578 debug_ddrs (vec<ddr_p> ddrs)
579 {
580 dump_ddrs (stderr, ddrs);
581 }
582
583 static void
584 split_constant_offset (tree exp, tree *var, tree *off,
585 hash_map<tree, std::pair<tree, tree> > &cache,
586 unsigned *limit);
587
588 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
589 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
590 constant of type ssizetype, and returns true. If we cannot do this
591 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
592 is returned. */
593
594 static bool
595 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
596 tree *var, tree *off,
597 hash_map<tree, std::pair<tree, tree> > &cache,
598 unsigned *limit)
599 {
600 tree var0, var1;
601 tree off0, off1;
602 enum tree_code ocode = code;
603
604 *var = NULL_TREE;
605 *off = NULL_TREE;
606
607 switch (code)
608 {
609 case INTEGER_CST:
610 *var = build_int_cst (type, 0);
611 *off = fold_convert (ssizetype, op0);
612 return true;
613
614 case POINTER_PLUS_EXPR:
615 ocode = PLUS_EXPR;
616 /* FALLTHROUGH */
617 case PLUS_EXPR:
618 case MINUS_EXPR:
619 if (TREE_CODE (op1) == INTEGER_CST)
620 {
621 split_constant_offset (op0, &var0, &off0, cache, limit);
622 *var = var0;
623 *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
624 return true;
625 }
626 split_constant_offset (op0, &var0, &off0, cache, limit);
627 split_constant_offset (op1, &var1, &off1, cache, limit);
628 *var = fold_build2 (code, type, var0, var1);
629 *off = size_binop (ocode, off0, off1);
630 return true;
631
632 case MULT_EXPR:
633 if (TREE_CODE (op1) != INTEGER_CST)
634 return false;
635
636 split_constant_offset (op0, &var0, &off0, cache, limit);
637 *var = fold_build2 (MULT_EXPR, type, var0, op1);
638 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
639 return true;
640
641 case ADDR_EXPR:
642 {
643 tree base, poffset;
644 poly_int64 pbitsize, pbitpos, pbytepos;
645 machine_mode pmode;
646 int punsignedp, preversep, pvolatilep;
647
648 op0 = TREE_OPERAND (op0, 0);
649 base
650 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
651 &punsignedp, &preversep, &pvolatilep);
652
653 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
654 return false;
655 base = build_fold_addr_expr (base);
656 off0 = ssize_int (pbytepos);
657
658 if (poffset)
659 {
660 split_constant_offset (poffset, &poffset, &off1, cache, limit);
661 off0 = size_binop (PLUS_EXPR, off0, off1);
662 if (POINTER_TYPE_P (TREE_TYPE (base)))
663 base = fold_build_pointer_plus (base, poffset);
664 else
665 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
666 fold_convert (TREE_TYPE (base), poffset));
667 }
668
669 var0 = fold_convert (type, base);
670
671 /* If variable length types are involved, punt, otherwise casts
672 might be converted into ARRAY_REFs in gimplify_conversion.
673 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
674 possibly no longer appears in current GIMPLE, might resurface.
675 This perhaps could run
676 if (CONVERT_EXPR_P (var0))
677 {
678 gimplify_conversion (&var0);
679 // Attempt to fill in any within var0 found ARRAY_REF's
680 // element size from corresponding op embedded ARRAY_REF,
681 // if unsuccessful, just punt.
682 } */
683 while (POINTER_TYPE_P (type))
684 type = TREE_TYPE (type);
685 if (int_size_in_bytes (type) < 0)
686 return false;
687
688 *var = var0;
689 *off = off0;
690 return true;
691 }
692
693 case SSA_NAME:
694 {
695 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
696 return false;
697
698 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
699 enum tree_code subcode;
700
701 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
702 return false;
703
704 subcode = gimple_assign_rhs_code (def_stmt);
705
706 /* We are using a cache to avoid un-CSEing large amounts of code. */
707 bool use_cache = false;
708 if (!has_single_use (op0)
709 && (subcode == POINTER_PLUS_EXPR
710 || subcode == PLUS_EXPR
711 || subcode == MINUS_EXPR
712 || subcode == MULT_EXPR
713 || subcode == ADDR_EXPR
714 || CONVERT_EXPR_CODE_P (subcode)))
715 {
716 use_cache = true;
717 bool existed;
718 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
719 if (existed)
720 {
721 if (integer_zerop (e.second))
722 return false;
723 *var = e.first;
724 *off = e.second;
725 return true;
726 }
727 e = std::make_pair (op0, ssize_int (0));
728 }
729
730 if (*limit == 0)
731 return false;
732 --*limit;
733
734 var0 = gimple_assign_rhs1 (def_stmt);
735 var1 = gimple_assign_rhs2 (def_stmt);
736
737 bool res = split_constant_offset_1 (type, var0, subcode, var1,
738 var, off, cache, limit);
739 if (res && use_cache)
740 *cache.get (op0) = std::make_pair (*var, *off);
741 return res;
742 }
743 CASE_CONVERT:
744 {
745 /* We must not introduce undefined overflow, and we must not change
746 the value. Hence we're okay if the inner type doesn't overflow
747 to start with (pointer or signed), the outer type also is an
748 integer or pointer and the outer precision is at least as large
749 as the inner. */
750 tree itype = TREE_TYPE (op0);
751 if ((POINTER_TYPE_P (itype)
752 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
753 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
754 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
755 {
756 if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
757 {
758 /* Split the unconverted operand and try to prove that
759 wrapping isn't a problem. */
760 tree tmp_var, tmp_off;
761 split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
762
763 /* See whether we have an SSA_NAME whose range is known
764 to be [A, B]. */
765 if (TREE_CODE (tmp_var) != SSA_NAME)
766 return false;
767 wide_int var_min, var_max;
768 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
769 &var_max);
770 wide_int var_nonzero = get_nonzero_bits (tmp_var);
771 signop sgn = TYPE_SIGN (itype);
772 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
773 &var_max, var_nonzero,
774 sgn) != VR_RANGE)
775 return false;
776
777 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
778 is known to be [A + TMP_OFF, B + TMP_OFF], with all
779 operations done in ITYPE. The addition must overflow
780 at both ends of the range or at neither. */
781 wi::overflow_type overflow[2];
782 unsigned int prec = TYPE_PRECISION (itype);
783 wide_int woff = wi::to_wide (tmp_off, prec);
784 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
785 wi::add (var_max, woff, sgn, &overflow[1]);
786 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
787 return false;
788
789 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
790 widest_int diff = (widest_int::from (op0_min, sgn)
791 - widest_int::from (var_min, sgn));
792 var0 = tmp_var;
793 *off = wide_int_to_tree (ssizetype, diff);
794 }
795 else
796 split_constant_offset (op0, &var0, off, cache, limit);
797 *var = fold_convert (type, var0);
798 return true;
799 }
800 return false;
801 }
802
803 default:
804 return false;
805 }
806 }
807
808 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
809 will be ssizetype. */
810
811 static void
812 split_constant_offset (tree exp, tree *var, tree *off,
813 hash_map<tree, std::pair<tree, tree> > &cache,
814 unsigned *limit)
815 {
816 tree type = TREE_TYPE (exp), op0, op1, e, o;
817 enum tree_code code;
818
819 *var = exp;
820 *off = ssize_int (0);
821
822 if (tree_is_chrec (exp)
823 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
824 return;
825
826 code = TREE_CODE (exp);
827 extract_ops_from_tree (exp, &code, &op0, &op1);
828 if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
829 {
830 *var = e;
831 *off = o;
832 }
833 }
834
835 void
836 split_constant_offset (tree exp, tree *var, tree *off)
837 {
838 unsigned limit = param_ssa_name_def_chain_limit;
839 static hash_map<tree, std::pair<tree, tree> > *cache;
840 if (!cache)
841 cache = new hash_map<tree, std::pair<tree, tree> > (37);
842 split_constant_offset (exp, var, off, *cache, &limit);
843 cache->empty ();
844 }
845
846 /* Returns the address ADDR of an object in a canonical shape (without nop
847 casts, and with type of pointer to the object). */
848
849 static tree
850 canonicalize_base_object_address (tree addr)
851 {
852 tree orig = addr;
853
854 STRIP_NOPS (addr);
855
856 /* The base address may be obtained by casting from integer, in that case
857 keep the cast. */
858 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
859 return orig;
860
861 if (TREE_CODE (addr) != ADDR_EXPR)
862 return addr;
863
864 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
865 }
866
867 /* Analyze the behavior of memory reference REF within STMT.
868 There are two modes:
869
870 - BB analysis. In this case we simply split the address into base,
871 init and offset components, without reference to any containing loop.
872 The resulting base and offset are general expressions and they can
873 vary arbitrarily from one iteration of the containing loop to the next.
874 The step is always zero.
875
876 - loop analysis. In this case we analyze the reference both wrt LOOP
877 and on the basis that the reference occurs (is "used") in LOOP;
878 see the comment above analyze_scalar_evolution_in_loop for more
879 information about this distinction. The base, init, offset and
880 step fields are all invariant in LOOP.
881
882 Perform BB analysis if LOOP is null, or if LOOP is the function's
883 dummy outermost loop. In other cases perform loop analysis.
884
885 Return true if the analysis succeeded and store the results in DRB if so.
886 BB analysis can only fail for bitfield or reversed-storage accesses. */
887
888 opt_result
889 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
890 class loop *loop, const gimple *stmt)
891 {
892 poly_int64 pbitsize, pbitpos;
893 tree base, poffset;
894 machine_mode pmode;
895 int punsignedp, preversep, pvolatilep;
896 affine_iv base_iv, offset_iv;
897 tree init, dinit, step;
898 bool in_loop = (loop && loop->num);
899
900 if (dump_file && (dump_flags & TDF_DETAILS))
901 fprintf (dump_file, "analyze_innermost: ");
902
903 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
904 &punsignedp, &preversep, &pvolatilep);
905 gcc_assert (base != NULL_TREE);
906
907 poly_int64 pbytepos;
908 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
909 return opt_result::failure_at (stmt,
910 "failed: bit offset alignment.\n");
911
912 if (preversep)
913 return opt_result::failure_at (stmt,
914 "failed: reverse storage order.\n");
915
916 /* Calculate the alignment and misalignment for the inner reference. */
917 unsigned int HOST_WIDE_INT bit_base_misalignment;
918 unsigned int bit_base_alignment;
919 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
920
921 /* There are no bitfield references remaining in BASE, so the values
922 we got back must be whole bytes. */
923 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
924 && bit_base_misalignment % BITS_PER_UNIT == 0);
925 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
926 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
927
928 if (TREE_CODE (base) == MEM_REF)
929 {
930 if (!integer_zerop (TREE_OPERAND (base, 1)))
931 {
932 /* Subtract MOFF from the base and add it to POFFSET instead.
933 Adjust the misalignment to reflect the amount we subtracted. */
934 poly_offset_int moff = mem_ref_offset (base);
935 base_misalignment -= moff.force_shwi ();
936 tree mofft = wide_int_to_tree (sizetype, moff);
937 if (!poffset)
938 poffset = mofft;
939 else
940 poffset = size_binop (PLUS_EXPR, poffset, mofft);
941 }
942 base = TREE_OPERAND (base, 0);
943 }
944 else
945 base = build_fold_addr_expr (base);
946
947 if (in_loop)
948 {
949 if (!simple_iv (loop, loop, base, &base_iv, true))
950 return opt_result::failure_at
951 (stmt, "failed: evolution of base is not affine.\n");
952 }
953 else
954 {
955 base_iv.base = base;
956 base_iv.step = ssize_int (0);
957 base_iv.no_overflow = true;
958 }
959
960 if (!poffset)
961 {
962 offset_iv.base = ssize_int (0);
963 offset_iv.step = ssize_int (0);
964 }
965 else
966 {
967 if (!in_loop)
968 {
969 offset_iv.base = poffset;
970 offset_iv.step = ssize_int (0);
971 }
972 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
973 return opt_result::failure_at
974 (stmt, "failed: evolution of offset is not affine.\n");
975 }
976
977 init = ssize_int (pbytepos);
978
979 /* Subtract any constant component from the base and add it to INIT instead.
980 Adjust the misalignment to reflect the amount we subtracted. */
981 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
982 init = size_binop (PLUS_EXPR, init, dinit);
983 base_misalignment -= TREE_INT_CST_LOW (dinit);
984
985 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
986 init = size_binop (PLUS_EXPR, init, dinit);
987
988 step = size_binop (PLUS_EXPR,
989 fold_convert (ssizetype, base_iv.step),
990 fold_convert (ssizetype, offset_iv.step));
991
992 base = canonicalize_base_object_address (base_iv.base);
993
994 /* See if get_pointer_alignment can guarantee a higher alignment than
995 the one we calculated above. */
996 unsigned int HOST_WIDE_INT alt_misalignment;
997 unsigned int alt_alignment;
998 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
999
1000 /* As above, these values must be whole bytes. */
1001 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1002 && alt_misalignment % BITS_PER_UNIT == 0);
1003 alt_alignment /= BITS_PER_UNIT;
1004 alt_misalignment /= BITS_PER_UNIT;
1005
1006 if (base_alignment < alt_alignment)
1007 {
1008 base_alignment = alt_alignment;
1009 base_misalignment = alt_misalignment;
1010 }
1011
1012 drb->base_address = base;
1013 drb->offset = fold_convert (ssizetype, offset_iv.base);
1014 drb->init = init;
1015 drb->step = step;
1016 if (known_misalignment (base_misalignment, base_alignment,
1017 &drb->base_misalignment))
1018 drb->base_alignment = base_alignment;
1019 else
1020 {
1021 drb->base_alignment = known_alignment (base_misalignment);
1022 drb->base_misalignment = 0;
1023 }
1024 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1025 drb->step_alignment = highest_pow2_factor (step);
1026
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 fprintf (dump_file, "success.\n");
1029
1030 return opt_result::success ();
1031 }
1032
1033 /* Return true if OP is a valid component reference for a DR access
1034 function. This accepts a subset of what handled_component_p accepts. */
1035
1036 static bool
1037 access_fn_component_p (tree op)
1038 {
1039 switch (TREE_CODE (op))
1040 {
1041 case REALPART_EXPR:
1042 case IMAGPART_EXPR:
1043 case ARRAY_REF:
1044 return true;
1045
1046 case COMPONENT_REF:
1047 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1048
1049 default:
1050 return false;
1051 }
1052 }
1053
1054 /* Determines the base object and the list of indices of memory reference
1055 DR, analyzed in LOOP and instantiated before NEST. */
1056
1057 static void
1058 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1059 {
1060 vec<tree> access_fns = vNULL;
1061 tree ref, op;
1062 tree base, off, access_fn;
1063
1064 /* If analyzing a basic-block there are no indices to analyze
1065 and thus no access functions. */
1066 if (!nest)
1067 {
1068 DR_BASE_OBJECT (dr) = DR_REF (dr);
1069 DR_ACCESS_FNS (dr).create (0);
1070 return;
1071 }
1072
1073 ref = DR_REF (dr);
1074
1075 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1076 into a two element array with a constant index. The base is
1077 then just the immediate underlying object. */
1078 if (TREE_CODE (ref) == REALPART_EXPR)
1079 {
1080 ref = TREE_OPERAND (ref, 0);
1081 access_fns.safe_push (integer_zero_node);
1082 }
1083 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1084 {
1085 ref = TREE_OPERAND (ref, 0);
1086 access_fns.safe_push (integer_one_node);
1087 }
1088
1089 /* Analyze access functions of dimensions we know to be independent.
1090 The list of component references handled here should be kept in
1091 sync with access_fn_component_p. */
1092 while (handled_component_p (ref))
1093 {
1094 if (TREE_CODE (ref) == ARRAY_REF)
1095 {
1096 op = TREE_OPERAND (ref, 1);
1097 access_fn = analyze_scalar_evolution (loop, op);
1098 access_fn = instantiate_scev (nest, loop, access_fn);
1099 access_fns.safe_push (access_fn);
1100 }
1101 else if (TREE_CODE (ref) == COMPONENT_REF
1102 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1103 {
1104 /* For COMPONENT_REFs of records (but not unions!) use the
1105 FIELD_DECL offset as constant access function so we can
1106 disambiguate a[i].f1 and a[i].f2. */
1107 tree off = component_ref_field_offset (ref);
1108 off = size_binop (PLUS_EXPR,
1109 size_binop (MULT_EXPR,
1110 fold_convert (bitsizetype, off),
1111 bitsize_int (BITS_PER_UNIT)),
1112 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1113 access_fns.safe_push (off);
1114 }
1115 else
1116 /* If we have an unhandled component we could not translate
1117 to an access function stop analyzing. We have determined
1118 our base object in this case. */
1119 break;
1120
1121 ref = TREE_OPERAND (ref, 0);
1122 }
1123
1124 /* If the address operand of a MEM_REF base has an evolution in the
1125 analyzed nest, add it as an additional independent access-function. */
1126 if (TREE_CODE (ref) == MEM_REF)
1127 {
1128 op = TREE_OPERAND (ref, 0);
1129 access_fn = analyze_scalar_evolution (loop, op);
1130 access_fn = instantiate_scev (nest, loop, access_fn);
1131 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1132 {
1133 tree orig_type;
1134 tree memoff = TREE_OPERAND (ref, 1);
1135 base = initial_condition (access_fn);
1136 orig_type = TREE_TYPE (base);
1137 STRIP_USELESS_TYPE_CONVERSION (base);
1138 split_constant_offset (base, &base, &off);
1139 STRIP_USELESS_TYPE_CONVERSION (base);
1140 /* Fold the MEM_REF offset into the evolutions initial
1141 value to make more bases comparable. */
1142 if (!integer_zerop (memoff))
1143 {
1144 off = size_binop (PLUS_EXPR, off,
1145 fold_convert (ssizetype, memoff));
1146 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1147 }
1148 /* Adjust the offset so it is a multiple of the access type
1149 size and thus we separate bases that can possibly be used
1150 to produce partial overlaps (which the access_fn machinery
1151 cannot handle). */
1152 wide_int rem;
1153 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1154 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1155 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1156 rem = wi::mod_trunc
1157 (wi::to_wide (off),
1158 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1159 SIGNED);
1160 else
1161 /* If we can't compute the remainder simply force the initial
1162 condition to zero. */
1163 rem = wi::to_wide (off);
1164 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1165 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1166 /* And finally replace the initial condition. */
1167 access_fn = chrec_replace_initial_condition
1168 (access_fn, fold_convert (orig_type, off));
1169 /* ??? This is still not a suitable base object for
1170 dr_may_alias_p - the base object needs to be an
1171 access that covers the object as whole. With
1172 an evolution in the pointer this cannot be
1173 guaranteed.
1174 As a band-aid, mark the access so we can special-case
1175 it in dr_may_alias_p. */
1176 tree old = ref;
1177 ref = fold_build2_loc (EXPR_LOCATION (ref),
1178 MEM_REF, TREE_TYPE (ref),
1179 base, memoff);
1180 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1181 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1182 DR_UNCONSTRAINED_BASE (dr) = true;
1183 access_fns.safe_push (access_fn);
1184 }
1185 }
1186 else if (DECL_P (ref))
1187 {
1188 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1189 ref = build2 (MEM_REF, TREE_TYPE (ref),
1190 build_fold_addr_expr (ref),
1191 build_int_cst (reference_alias_ptr_type (ref), 0));
1192 }
1193
1194 DR_BASE_OBJECT (dr) = ref;
1195 DR_ACCESS_FNS (dr) = access_fns;
1196 }
1197
1198 /* Extracts the alias analysis information from the memory reference DR. */
1199
1200 static void
1201 dr_analyze_alias (struct data_reference *dr)
1202 {
1203 tree ref = DR_REF (dr);
1204 tree base = get_base_address (ref), addr;
1205
1206 if (INDIRECT_REF_P (base)
1207 || TREE_CODE (base) == MEM_REF)
1208 {
1209 addr = TREE_OPERAND (base, 0);
1210 if (TREE_CODE (addr) == SSA_NAME)
1211 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1212 }
1213 }
1214
1215 /* Frees data reference DR. */
1216
1217 void
1218 free_data_ref (data_reference_p dr)
1219 {
1220 DR_ACCESS_FNS (dr).release ();
1221 free (dr);
1222 }
1223
1224 /* Analyze memory reference MEMREF, which is accessed in STMT.
1225 The reference is a read if IS_READ is true, otherwise it is a write.
1226 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1227 within STMT, i.e. that it might not occur even if STMT is executed
1228 and runs to completion.
1229
1230 Return the data_reference description of MEMREF. NEST is the outermost
1231 loop in which the reference should be instantiated, LOOP is the loop
1232 in which the data reference should be analyzed. */
1233
1234 struct data_reference *
1235 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1236 bool is_read, bool is_conditional_in_stmt)
1237 {
1238 struct data_reference *dr;
1239
1240 if (dump_file && (dump_flags & TDF_DETAILS))
1241 {
1242 fprintf (dump_file, "Creating dr for ");
1243 print_generic_expr (dump_file, memref, TDF_SLIM);
1244 fprintf (dump_file, "\n");
1245 }
1246
1247 dr = XCNEW (struct data_reference);
1248 DR_STMT (dr) = stmt;
1249 DR_REF (dr) = memref;
1250 DR_IS_READ (dr) = is_read;
1251 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1252
1253 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1254 nest != NULL ? loop : NULL, stmt);
1255 dr_analyze_indices (dr, nest, loop);
1256 dr_analyze_alias (dr);
1257
1258 if (dump_file && (dump_flags & TDF_DETAILS))
1259 {
1260 unsigned i;
1261 fprintf (dump_file, "\tbase_address: ");
1262 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1263 fprintf (dump_file, "\n\toffset from base address: ");
1264 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1265 fprintf (dump_file, "\n\tconstant offset from base address: ");
1266 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1267 fprintf (dump_file, "\n\tstep: ");
1268 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1269 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1270 fprintf (dump_file, "\n\tbase misalignment: %d",
1271 DR_BASE_MISALIGNMENT (dr));
1272 fprintf (dump_file, "\n\toffset alignment: %d",
1273 DR_OFFSET_ALIGNMENT (dr));
1274 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1275 fprintf (dump_file, "\n\tbase_object: ");
1276 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1277 fprintf (dump_file, "\n");
1278 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1279 {
1280 fprintf (dump_file, "\tAccess function %d: ", i);
1281 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1282 }
1283 }
1284
1285 return dr;
1286 }
1287
1288 /* A helper function computes order between two tree expressions T1 and T2.
1289 This is used in comparator functions sorting objects based on the order
1290 of tree expressions. The function returns -1, 0, or 1. */
1291
1292 int
1293 data_ref_compare_tree (tree t1, tree t2)
1294 {
1295 int i, cmp;
1296 enum tree_code code;
1297 char tclass;
1298
1299 if (t1 == t2)
1300 return 0;
1301 if (t1 == NULL)
1302 return -1;
1303 if (t2 == NULL)
1304 return 1;
1305
1306 STRIP_USELESS_TYPE_CONVERSION (t1);
1307 STRIP_USELESS_TYPE_CONVERSION (t2);
1308 if (t1 == t2)
1309 return 0;
1310
1311 if (TREE_CODE (t1) != TREE_CODE (t2)
1312 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1313 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1314
1315 code = TREE_CODE (t1);
1316 switch (code)
1317 {
1318 case INTEGER_CST:
1319 return tree_int_cst_compare (t1, t2);
1320
1321 case STRING_CST:
1322 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1323 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1324 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1325 TREE_STRING_LENGTH (t1));
1326
1327 case SSA_NAME:
1328 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1329 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1330 break;
1331
1332 default:
1333 if (POLY_INT_CST_P (t1))
1334 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1335 wi::to_poly_widest (t2));
1336
1337 tclass = TREE_CODE_CLASS (code);
1338
1339 /* For decls, compare their UIDs. */
1340 if (tclass == tcc_declaration)
1341 {
1342 if (DECL_UID (t1) != DECL_UID (t2))
1343 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1344 break;
1345 }
1346 /* For expressions, compare their operands recursively. */
1347 else if (IS_EXPR_CODE_CLASS (tclass))
1348 {
1349 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1350 {
1351 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1352 TREE_OPERAND (t2, i));
1353 if (cmp != 0)
1354 return cmp;
1355 }
1356 }
1357 else
1358 gcc_unreachable ();
1359 }
1360
1361 return 0;
1362 }
1363
1364 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1365 check. */
1366
1367 opt_result
1368 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1369 {
1370 if (dump_enabled_p ())
1371 dump_printf (MSG_NOTE,
1372 "consider run-time aliasing test between %T and %T\n",
1373 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1374
1375 if (!speed_p)
1376 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1377 "runtime alias check not supported when"
1378 " optimizing for size.\n");
1379
1380 /* FORNOW: We don't support versioning with outer-loop in either
1381 vectorization or loop distribution. */
1382 if (loop != NULL && loop->inner != NULL)
1383 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1384 "runtime alias check not supported for"
1385 " outer loop.\n");
1386
1387 return opt_result::success ();
1388 }
1389
1390 /* Operator == between two dr_with_seg_len objects.
1391
1392 This equality operator is used to make sure two data refs
1393 are the same one so that we will consider to combine the
1394 aliasing checks of those two pairs of data dependent data
1395 refs. */
1396
1397 static bool
1398 operator == (const dr_with_seg_len& d1,
1399 const dr_with_seg_len& d2)
1400 {
1401 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1402 DR_BASE_ADDRESS (d2.dr), 0)
1403 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1404 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1405 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1406 && known_eq (d1.access_size, d2.access_size)
1407 && d1.align == d2.align);
1408 }
1409
1410 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1411 so that we can combine aliasing checks in one scan. */
1412
1413 static int
1414 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1415 {
1416 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1417 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1418 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1419 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1420
1421 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1422 if a and c have the same basic address snd step, and b and d have the same
1423 address and step. Therefore, if any a&c or b&d don't have the same address
1424 and step, we don't care the order of those two pairs after sorting. */
1425 int comp_res;
1426
1427 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1428 DR_BASE_ADDRESS (b1.dr))) != 0)
1429 return comp_res;
1430 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1431 DR_BASE_ADDRESS (b2.dr))) != 0)
1432 return comp_res;
1433 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1434 DR_STEP (b1.dr))) != 0)
1435 return comp_res;
1436 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1437 DR_STEP (b2.dr))) != 0)
1438 return comp_res;
1439 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1440 DR_OFFSET (b1.dr))) != 0)
1441 return comp_res;
1442 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1443 DR_INIT (b1.dr))) != 0)
1444 return comp_res;
1445 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1446 DR_OFFSET (b2.dr))) != 0)
1447 return comp_res;
1448 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1449 DR_INIT (b2.dr))) != 0)
1450 return comp_res;
1451
1452 return 0;
1453 }
1454
1455 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1456
1457 static void
1458 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1459 {
1460 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1461 DR_REF (alias_pair->first.dr),
1462 DR_REF (alias_pair->second.dr));
1463
1464 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1465 alias_pair->first.seg_len);
1466 if (!operand_equal_p (alias_pair->first.seg_len,
1467 alias_pair->second.seg_len, 0))
1468 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1469
1470 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1471 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1472 if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1473 {
1474 dump_printf (MSG_NOTE, " vs. ");
1475 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1476 }
1477
1478 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1479 alias_pair->first.align);
1480 if (alias_pair->first.align != alias_pair->second.align)
1481 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1482
1483 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1484 if (alias_pair->flags & DR_ALIAS_RAW)
1485 dump_printf (MSG_NOTE, " RAW");
1486 if (alias_pair->flags & DR_ALIAS_WAR)
1487 dump_printf (MSG_NOTE, " WAR");
1488 if (alias_pair->flags & DR_ALIAS_WAW)
1489 dump_printf (MSG_NOTE, " WAW");
1490 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1491 dump_printf (MSG_NOTE, " ARBITRARY");
1492 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1493 dump_printf (MSG_NOTE, " SWAPPED");
1494 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1495 dump_printf (MSG_NOTE, " UNSWAPPED");
1496 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1497 dump_printf (MSG_NOTE, " MIXED_STEPS");
1498 if (alias_pair->flags == 0)
1499 dump_printf (MSG_NOTE, " <none>");
1500 dump_printf (MSG_NOTE, "\n");
1501 }
1502
1503 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1504 FACTOR is number of iterations that each data reference is accessed.
1505
1506 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1507 we create an expression:
1508
1509 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1510 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1511
1512 for aliasing checks. However, in some cases we can decrease the number
1513 of checks by combining two checks into one. For example, suppose we have
1514 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1515 condition is satisfied:
1516
1517 load_ptr_0 < load_ptr_1 &&
1518 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1519
1520 (this condition means, in each iteration of vectorized loop, the accessed
1521 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1522 load_ptr_1.)
1523
1524 we then can use only the following expression to finish the alising checks
1525 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1526
1527 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1528 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1529
1530 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1531 basic address. */
1532
1533 void
1534 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1535 poly_uint64)
1536 {
1537 /* Canonicalize each pair so that the base components are ordered wrt
1538 data_ref_compare_tree. This allows the loop below to merge more
1539 cases. */
1540 unsigned int i;
1541 dr_with_seg_len_pair_t *alias_pair;
1542 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1543 {
1544 data_reference_p dr_a = alias_pair->first.dr;
1545 data_reference_p dr_b = alias_pair->second.dr;
1546 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1547 DR_BASE_ADDRESS (dr_b));
1548 if (comp_res == 0)
1549 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1550 if (comp_res == 0)
1551 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1552 if (comp_res > 0)
1553 {
1554 std::swap (alias_pair->first, alias_pair->second);
1555 alias_pair->flags |= DR_ALIAS_SWAPPED;
1556 }
1557 else
1558 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1559 }
1560
1561 /* Sort the collected data ref pairs so that we can scan them once to
1562 combine all possible aliasing checks. */
1563 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1564
1565 /* Scan the sorted dr pairs and check if we can combine alias checks
1566 of two neighboring dr pairs. */
1567 for (i = 1; i < alias_pairs->length (); ++i)
1568 {
1569 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1570 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[i - 1];
1571 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1572
1573 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1574 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1575 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1576 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1577
1578 /* Remove duplicate data ref pairs. */
1579 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1580 {
1581 if (dump_enabled_p ())
1582 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1583 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1584 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1585 alias_pair1->flags |= alias_pair2->flags;
1586 alias_pairs->ordered_remove (i--);
1587 continue;
1588 }
1589
1590 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1591 {
1592 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1593 and DR_A1 and DR_A2 are two consecutive memrefs. */
1594 if (*dr_a1 == *dr_a2)
1595 {
1596 std::swap (dr_a1, dr_b1);
1597 std::swap (dr_a2, dr_b2);
1598 }
1599
1600 poly_int64 init_a1, init_a2;
1601 /* Only consider cases in which the distance between the initial
1602 DR_A1 and the initial DR_A2 is known at compile time. */
1603 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1604 DR_BASE_ADDRESS (dr_a2->dr), 0)
1605 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1606 DR_OFFSET (dr_a2->dr), 0)
1607 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1608 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1609 continue;
1610
1611 /* Don't combine if we can't tell which one comes first. */
1612 if (!ordered_p (init_a1, init_a2))
1613 continue;
1614
1615 /* Work out what the segment length would be if we did combine
1616 DR_A1 and DR_A2:
1617
1618 - If DR_A1 and DR_A2 have equal lengths, that length is
1619 also the combined length.
1620
1621 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1622 length is the lower bound on those lengths.
1623
1624 - If DR_A1 and DR_A2 both have positive lengths, the combined
1625 length is the upper bound on those lengths.
1626
1627 Other cases are unlikely to give a useful combination.
1628
1629 The lengths both have sizetype, so the sign is taken from
1630 the step instead. */
1631 poly_uint64 new_seg_len = 0;
1632 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1633 dr_a2->seg_len, 0);
1634 if (new_seg_len_p)
1635 {
1636 poly_uint64 seg_len_a1, seg_len_a2;
1637 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1638 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1639 continue;
1640
1641 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1642 if (TREE_CODE (indicator_a) != INTEGER_CST)
1643 continue;
1644
1645 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1646 if (TREE_CODE (indicator_b) != INTEGER_CST)
1647 continue;
1648
1649 int sign_a = tree_int_cst_sgn (indicator_a);
1650 int sign_b = tree_int_cst_sgn (indicator_b);
1651
1652 if (sign_a <= 0 && sign_b <= 0)
1653 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1654 else if (sign_a >= 0 && sign_b >= 0)
1655 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1656 else
1657 continue;
1658 }
1659 /* At this point we're committed to merging the refs. */
1660
1661 /* Make sure dr_a1 starts left of dr_a2. */
1662 if (maybe_gt (init_a1, init_a2))
1663 {
1664 std::swap (*dr_a1, *dr_a2);
1665 std::swap (init_a1, init_a2);
1666 }
1667
1668 /* The DR_Bs are equal, so only the DR_As can introduce
1669 mixed steps. */
1670 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1671 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1672
1673 if (new_seg_len_p)
1674 {
1675 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1676 new_seg_len);
1677 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1678 }
1679
1680 /* This is always positive due to the swap above. */
1681 poly_uint64 diff = init_a2 - init_a1;
1682
1683 /* The new check will start at DR_A1. Make sure that its access
1684 size encompasses the initial DR_A2. */
1685 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1686 {
1687 dr_a1->access_size = upper_bound (dr_a1->access_size,
1688 diff + dr_a2->access_size);
1689 unsigned int new_align = known_alignment (dr_a1->access_size);
1690 dr_a1->align = MIN (dr_a1->align, new_align);
1691 }
1692 if (dump_enabled_p ())
1693 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1694 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1695 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1696 alias_pair1->flags |= alias_pair2->flags;
1697 alias_pairs->ordered_remove (i);
1698 i--;
1699 }
1700 }
1701
1702 /* Try to restore the original dr_with_seg_len order within each
1703 dr_with_seg_len_pair_t. If we ended up combining swapped and
1704 unswapped pairs into the same check, we have to invalidate any
1705 RAW, WAR and WAW information for it. */
1706 if (dump_enabled_p ())
1707 dump_printf (MSG_NOTE, "merged alias checks:\n");
1708 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1709 {
1710 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1711 unsigned int swapped = (alias_pair->flags & swap_mask);
1712 if (swapped == DR_ALIAS_SWAPPED)
1713 std::swap (alias_pair->first, alias_pair->second);
1714 else if (swapped != DR_ALIAS_UNSWAPPED)
1715 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1716 alias_pair->flags &= ~swap_mask;
1717 if (dump_enabled_p ())
1718 dump_alias_pair (alias_pair, " ");
1719 }
1720 }
1721
1722 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
1723 free of aliases, using a condition based on index values instead
1724 of a condition based on addresses. Return true on success,
1725 storing the condition in *COND_EXPR.
1726
1727 This can only be done if the two data references in ALIAS_PAIR access
1728 the same array object and the index is the only difference. For example,
1729 if the two data references are DR_A and DR_B:
1730
1731 DR_A DR_B
1732 data-ref arr[i] arr[j]
1733 base_object arr arr
1734 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1735
1736 The addresses and their index are like:
1737
1738 |<- ADDR_A ->| |<- ADDR_B ->|
1739 ------------------------------------------------------->
1740 | | | | | | | | | |
1741 ------------------------------------------------------->
1742 i_0 ... i_0+4 j_0 ... j_0+4
1743
1744 We can create expression based on index rather than address:
1745
1746 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1747
1748 Note evolution step of index needs to be considered in comparison. */
1749
1750 static bool
1751 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1752 const dr_with_seg_len_pair_t &alias_pair)
1753 {
1754 const dr_with_seg_len &dr_a = alias_pair.first;
1755 const dr_with_seg_len &dr_b = alias_pair.second;
1756 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
1757 || integer_zerop (DR_STEP (dr_a.dr))
1758 || integer_zerop (DR_STEP (dr_b.dr))
1759 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1760 return false;
1761
1762 poly_uint64 seg_len1, seg_len2;
1763 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1764 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1765 return false;
1766
1767 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1768 return false;
1769
1770 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1771 return false;
1772
1773 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1774 return false;
1775
1776 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1777
1778 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1779 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1780 if (neg_step)
1781 {
1782 abs_step = -abs_step;
1783 seg_len1 = -seg_len1;
1784 seg_len2 = -seg_len2;
1785 }
1786 else
1787 {
1788 /* Include the access size in the length, so that we only have one
1789 tree addition below. */
1790 seg_len1 += dr_a.access_size;
1791 seg_len2 += dr_b.access_size;
1792 }
1793
1794 /* Infer the number of iterations with which the memory segment is accessed
1795 by DR. In other words, alias is checked if memory segment accessed by
1796 DR_A in some iterations intersect with memory segment accessed by DR_B
1797 in the same amount iterations.
1798 Note segnment length is a linear function of number of iterations with
1799 DR_STEP as the coefficient. */
1800 poly_uint64 niter_len1, niter_len2;
1801 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1802 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1803 return false;
1804
1805 poly_uint64 niter_access1 = 0, niter_access2 = 0;
1806 if (neg_step)
1807 {
1808 /* Divide each access size by the byte step, rounding up. */
1809 if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1810 abs_step, &niter_access1)
1811 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1812 abs_step, &niter_access2))
1813 return false;
1814 }
1815
1816 unsigned int i;
1817 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1818 {
1819 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1820 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1821 /* Two indices must be the same if they are not scev, or not scev wrto
1822 current loop being vecorized. */
1823 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1824 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1825 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1826 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1827 {
1828 if (operand_equal_p (access1, access2, 0))
1829 continue;
1830
1831 return false;
1832 }
1833 /* The two indices must have the same step. */
1834 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1835 return false;
1836
1837 tree idx_step = CHREC_RIGHT (access1);
1838 /* Index must have const step, otherwise DR_STEP won't be constant. */
1839 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1840 /* Index must evaluate in the same direction as DR. */
1841 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1842
1843 tree min1 = CHREC_LEFT (access1);
1844 tree min2 = CHREC_LEFT (access2);
1845 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1846 return false;
1847
1848 /* Ideally, alias can be checked against loop's control IV, but we
1849 need to prove linear mapping between control IV and reference
1850 index. Although that should be true, we check against (array)
1851 index of data reference. Like segment length, index length is
1852 linear function of the number of iterations with index_step as
1853 the coefficient, i.e, niter_len * idx_step. */
1854 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1855 build_int_cst (TREE_TYPE (min1),
1856 niter_len1));
1857 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1858 build_int_cst (TREE_TYPE (min2),
1859 niter_len2));
1860 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1861 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1862 /* Adjust ranges for negative step. */
1863 if (neg_step)
1864 {
1865 /* IDX_LEN1 and IDX_LEN2 are negative in this case. */
1866 std::swap (min1, max1);
1867 std::swap (min2, max2);
1868
1869 /* As with the lengths just calculated, we've measured the access
1870 sizes in iterations, so multiply them by the index step. */
1871 tree idx_access1
1872 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1873 build_int_cst (TREE_TYPE (min1), niter_access1));
1874 tree idx_access2
1875 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1876 build_int_cst (TREE_TYPE (min2), niter_access2));
1877
1878 /* MINUS_EXPR because the above values are negative. */
1879 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1880 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1881 }
1882 tree part_cond_expr
1883 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1884 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1885 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1886 if (*cond_expr)
1887 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1888 *cond_expr, part_cond_expr);
1889 else
1890 *cond_expr = part_cond_expr;
1891 }
1892 if (dump_enabled_p ())
1893 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
1894 return true;
1895 }
1896
1897 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1898 every address ADDR accessed by D:
1899
1900 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1901
1902 In this case, every element accessed by D is aligned to at least
1903 ALIGN bytes.
1904
1905 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1906
1907 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
1908
1909 static void
1910 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1911 tree *seg_max_out, HOST_WIDE_INT align)
1912 {
1913 /* Each access has the following pattern:
1914
1915 <- |seg_len| ->
1916 <--- A: -ve step --->
1917 +-----+-------+-----+-------+-----+
1918 | n-1 | ,.... | 0 | ..... | n-1 |
1919 +-----+-------+-----+-------+-----+
1920 <--- B: +ve step --->
1921 <- |seg_len| ->
1922 |
1923 base address
1924
1925 where "n" is the number of scalar iterations covered by the segment.
1926 (This should be VF for a particular pair if we know that both steps
1927 are the same, otherwise it will be the full number of scalar loop
1928 iterations.)
1929
1930 A is the range of bytes accessed when the step is negative,
1931 B is the range when the step is positive.
1932
1933 If the access size is "access_size" bytes, the lowest addressed byte is:
1934
1935 base + (step < 0 ? seg_len : 0) [LB]
1936
1937 and the highest addressed byte is always below:
1938
1939 base + (step < 0 ? 0 : seg_len) + access_size [UB]
1940
1941 Thus:
1942
1943 LB <= ADDR < UB
1944
1945 If ALIGN is nonzero, all three values are aligned to at least ALIGN
1946 bytes, so:
1947
1948 LB <= ADDR <= UB - ALIGN
1949
1950 where "- ALIGN" folds naturally with the "+ access_size" and often
1951 cancels it out.
1952
1953 We don't try to simplify LB and UB beyond this (e.g. by using
1954 MIN and MAX based on whether seg_len rather than the stride is
1955 negative) because it is possible for the absolute size of the
1956 segment to overflow the range of a ssize_t.
1957
1958 Keeping the pointer_plus outside of the cond_expr should allow
1959 the cond_exprs to be shared with other alias checks. */
1960 tree indicator = dr_direction_indicator (d.dr);
1961 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1962 fold_convert (ssizetype, indicator),
1963 ssize_int (0));
1964 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1965 DR_OFFSET (d.dr));
1966 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1967 tree seg_len
1968 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1969
1970 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1971 seg_len, size_zero_node);
1972 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1973 size_zero_node, seg_len);
1974 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1975 size_int (d.access_size - align));
1976
1977 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1978 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1979 }
1980
1981 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
1982 storing the condition in *COND_EXPR. The fallback is to generate a
1983 a test that the two accesses do not overlap:
1984
1985 end_a <= start_b || end_b <= start_a. */
1986
1987 static void
1988 create_intersect_range_checks (class loop *loop, tree *cond_expr,
1989 const dr_with_seg_len_pair_t &alias_pair)
1990 {
1991 const dr_with_seg_len& dr_a = alias_pair.first;
1992 const dr_with_seg_len& dr_b = alias_pair.second;
1993 *cond_expr = NULL_TREE;
1994 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
1995 return;
1996
1997 unsigned HOST_WIDE_INT min_align;
1998 tree_code cmp_code;
1999 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2000 are equivalent. This is just an optimization heuristic. */
2001 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2002 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2003 {
2004 /* In this case adding access_size to seg_len is likely to give
2005 a simple X * step, where X is either the number of scalar
2006 iterations or the vectorization factor. We're better off
2007 keeping that, rather than subtracting an alignment from it.
2008
2009 In this case the maximum values are exclusive and so there is
2010 no alias if the maximum of one segment equals the minimum
2011 of another. */
2012 min_align = 0;
2013 cmp_code = LE_EXPR;
2014 }
2015 else
2016 {
2017 /* Calculate the minimum alignment shared by all four pointers,
2018 then arrange for this alignment to be subtracted from the
2019 exclusive maximum values to get inclusive maximum values.
2020 This "- min_align" is cumulative with a "+ access_size"
2021 in the calculation of the maximum values. In the best
2022 (and common) case, the two cancel each other out, leaving
2023 us with an inclusive bound based only on seg_len. In the
2024 worst case we're simply adding a smaller number than before.
2025
2026 Because the maximum values are inclusive, there is an alias
2027 if the maximum value of one segment is equal to the minimum
2028 value of the other. */
2029 min_align = MIN (dr_a.align, dr_b.align);
2030 cmp_code = LT_EXPR;
2031 }
2032
2033 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2034 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2035 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2036
2037 *cond_expr
2038 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2039 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2040 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2041 if (dump_enabled_p ())
2042 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2043 }
2044
2045 /* Create a conditional expression that represents the run-time checks for
2046 overlapping of address ranges represented by a list of data references
2047 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2048 COND_EXPR is the conditional expression to be used in the if statement
2049 that controls which version of the loop gets executed at runtime. */
2050
2051 void
2052 create_runtime_alias_checks (class loop *loop,
2053 vec<dr_with_seg_len_pair_t> *alias_pairs,
2054 tree * cond_expr)
2055 {
2056 tree part_cond_expr;
2057
2058 fold_defer_overflow_warnings ();
2059 dr_with_seg_len_pair_t *alias_pair;
2060 unsigned int i;
2061 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
2062 {
2063 gcc_assert (alias_pair->flags);
2064 if (dump_enabled_p ())
2065 dump_printf (MSG_NOTE,
2066 "create runtime check for data references %T and %T\n",
2067 DR_REF (alias_pair->first.dr),
2068 DR_REF (alias_pair->second.dr));
2069
2070 /* Create condition expression for each pair data references. */
2071 create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
2072 if (*cond_expr)
2073 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2074 *cond_expr, part_cond_expr);
2075 else
2076 *cond_expr = part_cond_expr;
2077 }
2078 fold_undefer_and_ignore_overflow_warnings ();
2079 }
2080
2081 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2082 expressions. */
2083 static bool
2084 dr_equal_offsets_p1 (tree offset1, tree offset2)
2085 {
2086 bool res;
2087
2088 STRIP_NOPS (offset1);
2089 STRIP_NOPS (offset2);
2090
2091 if (offset1 == offset2)
2092 return true;
2093
2094 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2095 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2096 return false;
2097
2098 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2099 TREE_OPERAND (offset2, 0));
2100
2101 if (!res || !BINARY_CLASS_P (offset1))
2102 return res;
2103
2104 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2105 TREE_OPERAND (offset2, 1));
2106
2107 return res;
2108 }
2109
2110 /* Check if DRA and DRB have equal offsets. */
2111 bool
2112 dr_equal_offsets_p (struct data_reference *dra,
2113 struct data_reference *drb)
2114 {
2115 tree offset1, offset2;
2116
2117 offset1 = DR_OFFSET (dra);
2118 offset2 = DR_OFFSET (drb);
2119
2120 return dr_equal_offsets_p1 (offset1, offset2);
2121 }
2122
2123 /* Returns true if FNA == FNB. */
2124
2125 static bool
2126 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2127 {
2128 unsigned i, n = fna.length ();
2129
2130 if (n != fnb.length ())
2131 return false;
2132
2133 for (i = 0; i < n; i++)
2134 if (!operand_equal_p (fna[i], fnb[i], 0))
2135 return false;
2136
2137 return true;
2138 }
2139
2140 /* If all the functions in CF are the same, returns one of them,
2141 otherwise returns NULL. */
2142
2143 static affine_fn
2144 common_affine_function (conflict_function *cf)
2145 {
2146 unsigned i;
2147 affine_fn comm;
2148
2149 if (!CF_NONTRIVIAL_P (cf))
2150 return affine_fn ();
2151
2152 comm = cf->fns[0];
2153
2154 for (i = 1; i < cf->n; i++)
2155 if (!affine_function_equal_p (comm, cf->fns[i]))
2156 return affine_fn ();
2157
2158 return comm;
2159 }
2160
2161 /* Returns the base of the affine function FN. */
2162
2163 static tree
2164 affine_function_base (affine_fn fn)
2165 {
2166 return fn[0];
2167 }
2168
2169 /* Returns true if FN is a constant. */
2170
2171 static bool
2172 affine_function_constant_p (affine_fn fn)
2173 {
2174 unsigned i;
2175 tree coef;
2176
2177 for (i = 1; fn.iterate (i, &coef); i++)
2178 if (!integer_zerop (coef))
2179 return false;
2180
2181 return true;
2182 }
2183
2184 /* Returns true if FN is the zero constant function. */
2185
2186 static bool
2187 affine_function_zero_p (affine_fn fn)
2188 {
2189 return (integer_zerop (affine_function_base (fn))
2190 && affine_function_constant_p (fn));
2191 }
2192
2193 /* Returns a signed integer type with the largest precision from TA
2194 and TB. */
2195
2196 static tree
2197 signed_type_for_types (tree ta, tree tb)
2198 {
2199 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2200 return signed_type_for (ta);
2201 else
2202 return signed_type_for (tb);
2203 }
2204
2205 /* Applies operation OP on affine functions FNA and FNB, and returns the
2206 result. */
2207
2208 static affine_fn
2209 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2210 {
2211 unsigned i, n, m;
2212 affine_fn ret;
2213 tree coef;
2214
2215 if (fnb.length () > fna.length ())
2216 {
2217 n = fna.length ();
2218 m = fnb.length ();
2219 }
2220 else
2221 {
2222 n = fnb.length ();
2223 m = fna.length ();
2224 }
2225
2226 ret.create (m);
2227 for (i = 0; i < n; i++)
2228 {
2229 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2230 TREE_TYPE (fnb[i]));
2231 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2232 }
2233
2234 for (; fna.iterate (i, &coef); i++)
2235 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2236 coef, integer_zero_node));
2237 for (; fnb.iterate (i, &coef); i++)
2238 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2239 integer_zero_node, coef));
2240
2241 return ret;
2242 }
2243
2244 /* Returns the sum of affine functions FNA and FNB. */
2245
2246 static affine_fn
2247 affine_fn_plus (affine_fn fna, affine_fn fnb)
2248 {
2249 return affine_fn_op (PLUS_EXPR, fna, fnb);
2250 }
2251
2252 /* Returns the difference of affine functions FNA and FNB. */
2253
2254 static affine_fn
2255 affine_fn_minus (affine_fn fna, affine_fn fnb)
2256 {
2257 return affine_fn_op (MINUS_EXPR, fna, fnb);
2258 }
2259
2260 /* Frees affine function FN. */
2261
2262 static void
2263 affine_fn_free (affine_fn fn)
2264 {
2265 fn.release ();
2266 }
2267
2268 /* Determine for each subscript in the data dependence relation DDR
2269 the distance. */
2270
2271 static void
2272 compute_subscript_distance (struct data_dependence_relation *ddr)
2273 {
2274 conflict_function *cf_a, *cf_b;
2275 affine_fn fn_a, fn_b, diff;
2276
2277 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2278 {
2279 unsigned int i;
2280
2281 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2282 {
2283 struct subscript *subscript;
2284
2285 subscript = DDR_SUBSCRIPT (ddr, i);
2286 cf_a = SUB_CONFLICTS_IN_A (subscript);
2287 cf_b = SUB_CONFLICTS_IN_B (subscript);
2288
2289 fn_a = common_affine_function (cf_a);
2290 fn_b = common_affine_function (cf_b);
2291 if (!fn_a.exists () || !fn_b.exists ())
2292 {
2293 SUB_DISTANCE (subscript) = chrec_dont_know;
2294 return;
2295 }
2296 diff = affine_fn_minus (fn_a, fn_b);
2297
2298 if (affine_function_constant_p (diff))
2299 SUB_DISTANCE (subscript) = affine_function_base (diff);
2300 else
2301 SUB_DISTANCE (subscript) = chrec_dont_know;
2302
2303 affine_fn_free (diff);
2304 }
2305 }
2306 }
2307
2308 /* Returns the conflict function for "unknown". */
2309
2310 static conflict_function *
2311 conflict_fn_not_known (void)
2312 {
2313 conflict_function *fn = XCNEW (conflict_function);
2314 fn->n = NOT_KNOWN;
2315
2316 return fn;
2317 }
2318
2319 /* Returns the conflict function for "independent". */
2320
2321 static conflict_function *
2322 conflict_fn_no_dependence (void)
2323 {
2324 conflict_function *fn = XCNEW (conflict_function);
2325 fn->n = NO_DEPENDENCE;
2326
2327 return fn;
2328 }
2329
2330 /* Returns true if the address of OBJ is invariant in LOOP. */
2331
2332 static bool
2333 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2334 {
2335 while (handled_component_p (obj))
2336 {
2337 if (TREE_CODE (obj) == ARRAY_REF)
2338 {
2339 for (int i = 1; i < 4; ++i)
2340 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2341 loop->num))
2342 return false;
2343 }
2344 else if (TREE_CODE (obj) == COMPONENT_REF)
2345 {
2346 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2347 loop->num))
2348 return false;
2349 }
2350 obj = TREE_OPERAND (obj, 0);
2351 }
2352
2353 if (!INDIRECT_REF_P (obj)
2354 && TREE_CODE (obj) != MEM_REF)
2355 return true;
2356
2357 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2358 loop->num);
2359 }
2360
2361 /* Returns false if we can prove that data references A and B do not alias,
2362 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2363 considered. */
2364
2365 bool
2366 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2367 class loop *loop_nest)
2368 {
2369 tree addr_a = DR_BASE_OBJECT (a);
2370 tree addr_b = DR_BASE_OBJECT (b);
2371
2372 /* If we are not processing a loop nest but scalar code we
2373 do not need to care about possible cross-iteration dependences
2374 and thus can process the full original reference. Do so,
2375 similar to how loop invariant motion applies extra offset-based
2376 disambiguation. */
2377 if (!loop_nest)
2378 {
2379 aff_tree off1, off2;
2380 poly_widest_int size1, size2;
2381 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2382 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2383 aff_combination_scale (&off1, -1);
2384 aff_combination_add (&off2, &off1);
2385 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2386 return false;
2387 }
2388
2389 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2390 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2391 /* For cross-iteration dependences the cliques must be valid for the
2392 whole loop, not just individual iterations. */
2393 && (!loop_nest
2394 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2395 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2396 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2397 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2398 return false;
2399
2400 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2401 do not know the size of the base-object. So we cannot do any
2402 offset/overlap based analysis but have to rely on points-to
2403 information only. */
2404 if (TREE_CODE (addr_a) == MEM_REF
2405 && (DR_UNCONSTRAINED_BASE (a)
2406 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2407 {
2408 /* For true dependences we can apply TBAA. */
2409 if (flag_strict_aliasing
2410 && DR_IS_WRITE (a) && DR_IS_READ (b)
2411 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2412 get_alias_set (DR_REF (b))))
2413 return false;
2414 if (TREE_CODE (addr_b) == MEM_REF)
2415 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2416 TREE_OPERAND (addr_b, 0));
2417 else
2418 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2419 build_fold_addr_expr (addr_b));
2420 }
2421 else if (TREE_CODE (addr_b) == MEM_REF
2422 && (DR_UNCONSTRAINED_BASE (b)
2423 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2424 {
2425 /* For true dependences we can apply TBAA. */
2426 if (flag_strict_aliasing
2427 && DR_IS_WRITE (a) && DR_IS_READ (b)
2428 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2429 get_alias_set (DR_REF (b))))
2430 return false;
2431 if (TREE_CODE (addr_a) == MEM_REF)
2432 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2433 TREE_OPERAND (addr_b, 0));
2434 else
2435 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2436 TREE_OPERAND (addr_b, 0));
2437 }
2438
2439 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2440 that is being subsetted in the loop nest. */
2441 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2442 return refs_output_dependent_p (addr_a, addr_b);
2443 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2444 return refs_anti_dependent_p (addr_a, addr_b);
2445 return refs_may_alias_p (addr_a, addr_b);
2446 }
2447
2448 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2449 if it is meaningful to compare their associated access functions
2450 when checking for dependencies. */
2451
2452 static bool
2453 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2454 {
2455 /* Allow pairs of component refs from the following sets:
2456
2457 { REALPART_EXPR, IMAGPART_EXPR }
2458 { COMPONENT_REF }
2459 { ARRAY_REF }. */
2460 tree_code code_a = TREE_CODE (ref_a);
2461 tree_code code_b = TREE_CODE (ref_b);
2462 if (code_a == IMAGPART_EXPR)
2463 code_a = REALPART_EXPR;
2464 if (code_b == IMAGPART_EXPR)
2465 code_b = REALPART_EXPR;
2466 if (code_a != code_b)
2467 return false;
2468
2469 if (TREE_CODE (ref_a) == COMPONENT_REF)
2470 /* ??? We cannot simply use the type of operand #0 of the refs here as
2471 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2472 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2473 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2474 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2475
2476 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2477 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2478 }
2479
2480 /* Initialize a data dependence relation between data accesses A and
2481 B. NB_LOOPS is the number of loops surrounding the references: the
2482 size of the classic distance/direction vectors. */
2483
2484 struct data_dependence_relation *
2485 initialize_data_dependence_relation (struct data_reference *a,
2486 struct data_reference *b,
2487 vec<loop_p> loop_nest)
2488 {
2489 struct data_dependence_relation *res;
2490 unsigned int i;
2491
2492 res = XCNEW (struct data_dependence_relation);
2493 DDR_A (res) = a;
2494 DDR_B (res) = b;
2495 DDR_LOOP_NEST (res).create (0);
2496 DDR_SUBSCRIPTS (res).create (0);
2497 DDR_DIR_VECTS (res).create (0);
2498 DDR_DIST_VECTS (res).create (0);
2499
2500 if (a == NULL || b == NULL)
2501 {
2502 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2503 return res;
2504 }
2505
2506 /* If the data references do not alias, then they are independent. */
2507 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2508 {
2509 DDR_ARE_DEPENDENT (res) = chrec_known;
2510 return res;
2511 }
2512
2513 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2514 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2515 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2516 {
2517 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2518 return res;
2519 }
2520
2521 /* For unconstrained bases, the root (highest-indexed) subscript
2522 describes a variation in the base of the original DR_REF rather
2523 than a component access. We have no type that accurately describes
2524 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2525 applying this subscript) so limit the search to the last real
2526 component access.
2527
2528 E.g. for:
2529
2530 void
2531 f (int a[][8], int b[][8])
2532 {
2533 for (int i = 0; i < 8; ++i)
2534 a[i * 2][0] = b[i][0];
2535 }
2536
2537 the a and b accesses have a single ARRAY_REF component reference [0]
2538 but have two subscripts. */
2539 if (DR_UNCONSTRAINED_BASE (a))
2540 num_dimensions_a -= 1;
2541 if (DR_UNCONSTRAINED_BASE (b))
2542 num_dimensions_b -= 1;
2543
2544 /* These structures describe sequences of component references in
2545 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2546 specific access function. */
2547 struct {
2548 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2549 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2550 indices. In C notation, these are the indices of the rightmost
2551 component references; e.g. for a sequence .b.c.d, the start
2552 index is for .d. */
2553 unsigned int start_a;
2554 unsigned int start_b;
2555
2556 /* The sequence contains LENGTH consecutive access functions from
2557 each DR. */
2558 unsigned int length;
2559
2560 /* The enclosing objects for the A and B sequences respectively,
2561 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2562 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2563 tree object_a;
2564 tree object_b;
2565 } full_seq = {}, struct_seq = {};
2566
2567 /* Before each iteration of the loop:
2568
2569 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2570 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2571 unsigned int index_a = 0;
2572 unsigned int index_b = 0;
2573 tree ref_a = DR_REF (a);
2574 tree ref_b = DR_REF (b);
2575
2576 /* Now walk the component references from the final DR_REFs back up to
2577 the enclosing base objects. Each component reference corresponds
2578 to one access function in the DR, with access function 0 being for
2579 the final DR_REF and the highest-indexed access function being the
2580 one that is applied to the base of the DR.
2581
2582 Look for a sequence of component references whose access functions
2583 are comparable (see access_fn_components_comparable_p). If more
2584 than one such sequence exists, pick the one nearest the base
2585 (which is the leftmost sequence in C notation). Store this sequence
2586 in FULL_SEQ.
2587
2588 For example, if we have:
2589
2590 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2591
2592 A: a[0][i].s.c.d
2593 B: __real b[0][i].s.e[i].f
2594
2595 (where d is the same type as the real component of f) then the access
2596 functions would be:
2597
2598 0 1 2 3
2599 A: .d .c .s [i]
2600
2601 0 1 2 3 4 5
2602 B: __real .f [i] .e .s [i]
2603
2604 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2605 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2606 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2607 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2608 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2609 index foo[10] arrays, so is again comparable. The sequence is
2610 therefore:
2611
2612 A: [1, 3] (i.e. [i].s.c)
2613 B: [3, 5] (i.e. [i].s.e)
2614
2615 Also look for sequences of component references whose access
2616 functions are comparable and whose enclosing objects have the same
2617 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2618 example, STRUCT_SEQ would be:
2619
2620 A: [1, 2] (i.e. s.c)
2621 B: [3, 4] (i.e. s.e) */
2622 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2623 {
2624 /* REF_A and REF_B must be one of the component access types
2625 allowed by dr_analyze_indices. */
2626 gcc_checking_assert (access_fn_component_p (ref_a));
2627 gcc_checking_assert (access_fn_component_p (ref_b));
2628
2629 /* Get the immediately-enclosing objects for REF_A and REF_B,
2630 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2631 and DR_ACCESS_FN (B, INDEX_B). */
2632 tree object_a = TREE_OPERAND (ref_a, 0);
2633 tree object_b = TREE_OPERAND (ref_b, 0);
2634
2635 tree type_a = TREE_TYPE (object_a);
2636 tree type_b = TREE_TYPE (object_b);
2637 if (access_fn_components_comparable_p (ref_a, ref_b))
2638 {
2639 /* This pair of component accesses is comparable for dependence
2640 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2641 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2642 if (full_seq.start_a + full_seq.length != index_a
2643 || full_seq.start_b + full_seq.length != index_b)
2644 {
2645 /* The accesses don't extend the current sequence,
2646 so start a new one here. */
2647 full_seq.start_a = index_a;
2648 full_seq.start_b = index_b;
2649 full_seq.length = 0;
2650 }
2651
2652 /* Add this pair of references to the sequence. */
2653 full_seq.length += 1;
2654 full_seq.object_a = object_a;
2655 full_seq.object_b = object_b;
2656
2657 /* If the enclosing objects are structures (and thus have the
2658 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2659 if (TREE_CODE (type_a) == RECORD_TYPE)
2660 struct_seq = full_seq;
2661
2662 /* Move to the next containing reference for both A and B. */
2663 ref_a = object_a;
2664 ref_b = object_b;
2665 index_a += 1;
2666 index_b += 1;
2667 continue;
2668 }
2669
2670 /* Try to approach equal type sizes. */
2671 if (!COMPLETE_TYPE_P (type_a)
2672 || !COMPLETE_TYPE_P (type_b)
2673 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2674 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2675 break;
2676
2677 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2678 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2679 if (size_a <= size_b)
2680 {
2681 index_a += 1;
2682 ref_a = object_a;
2683 }
2684 if (size_b <= size_a)
2685 {
2686 index_b += 1;
2687 ref_b = object_b;
2688 }
2689 }
2690
2691 /* See whether FULL_SEQ ends at the base and whether the two bases
2692 are equal. We do not care about TBAA or alignment info so we can
2693 use OEP_ADDRESS_OF to avoid false negatives. */
2694 tree base_a = DR_BASE_OBJECT (a);
2695 tree base_b = DR_BASE_OBJECT (b);
2696 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2697 && full_seq.start_b + full_seq.length == num_dimensions_b
2698 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2699 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2700 && types_compatible_p (TREE_TYPE (base_a),
2701 TREE_TYPE (base_b))
2702 && (!loop_nest.exists ()
2703 || (object_address_invariant_in_loop_p
2704 (loop_nest[0], base_a))));
2705
2706 /* If the bases are the same, we can include the base variation too.
2707 E.g. the b accesses in:
2708
2709 for (int i = 0; i < n; ++i)
2710 b[i + 4][0] = b[i][0];
2711
2712 have a definite dependence distance of 4, while for:
2713
2714 for (int i = 0; i < n; ++i)
2715 a[i + 4][0] = b[i][0];
2716
2717 the dependence distance depends on the gap between a and b.
2718
2719 If the bases are different then we can only rely on the sequence
2720 rooted at a structure access, since arrays are allowed to overlap
2721 arbitrarily and change shape arbitrarily. E.g. we treat this as
2722 valid code:
2723
2724 int a[256];
2725 ...
2726 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2727
2728 where two lvalues with the same int[4][3] type overlap, and where
2729 both lvalues are distinct from the object's declared type. */
2730 if (same_base_p)
2731 {
2732 if (DR_UNCONSTRAINED_BASE (a))
2733 full_seq.length += 1;
2734 }
2735 else
2736 full_seq = struct_seq;
2737
2738 /* Punt if we didn't find a suitable sequence. */
2739 if (full_seq.length == 0)
2740 {
2741 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2742 return res;
2743 }
2744
2745 if (!same_base_p)
2746 {
2747 /* Partial overlap is possible for different bases when strict aliasing
2748 is not in effect. It's also possible if either base involves a union
2749 access; e.g. for:
2750
2751 struct s1 { int a[2]; };
2752 struct s2 { struct s1 b; int c; };
2753 struct s3 { int d; struct s1 e; };
2754 union u { struct s2 f; struct s3 g; } *p, *q;
2755
2756 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2757 "p->g.e" (base "p->g") and might partially overlap the s1 at
2758 "q->g.e" (base "q->g"). */
2759 if (!flag_strict_aliasing
2760 || ref_contains_union_access_p (full_seq.object_a)
2761 || ref_contains_union_access_p (full_seq.object_b))
2762 {
2763 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2764 return res;
2765 }
2766
2767 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2768 if (!loop_nest.exists ()
2769 || (object_address_invariant_in_loop_p (loop_nest[0],
2770 full_seq.object_a)
2771 && object_address_invariant_in_loop_p (loop_nest[0],
2772 full_seq.object_b)))
2773 {
2774 DDR_OBJECT_A (res) = full_seq.object_a;
2775 DDR_OBJECT_B (res) = full_seq.object_b;
2776 }
2777 }
2778
2779 DDR_AFFINE_P (res) = true;
2780 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2781 DDR_SUBSCRIPTS (res).create (full_seq.length);
2782 DDR_LOOP_NEST (res) = loop_nest;
2783 DDR_SELF_REFERENCE (res) = false;
2784
2785 for (i = 0; i < full_seq.length; ++i)
2786 {
2787 struct subscript *subscript;
2788
2789 subscript = XNEW (struct subscript);
2790 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2791 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2792 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2793 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2794 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2795 SUB_DISTANCE (subscript) = chrec_dont_know;
2796 DDR_SUBSCRIPTS (res).safe_push (subscript);
2797 }
2798
2799 return res;
2800 }
2801
2802 /* Frees memory used by the conflict function F. */
2803
2804 static void
2805 free_conflict_function (conflict_function *f)
2806 {
2807 unsigned i;
2808
2809 if (CF_NONTRIVIAL_P (f))
2810 {
2811 for (i = 0; i < f->n; i++)
2812 affine_fn_free (f->fns[i]);
2813 }
2814 free (f);
2815 }
2816
2817 /* Frees memory used by SUBSCRIPTS. */
2818
2819 static void
2820 free_subscripts (vec<subscript_p> subscripts)
2821 {
2822 unsigned i;
2823 subscript_p s;
2824
2825 FOR_EACH_VEC_ELT (subscripts, i, s)
2826 {
2827 free_conflict_function (s->conflicting_iterations_in_a);
2828 free_conflict_function (s->conflicting_iterations_in_b);
2829 free (s);
2830 }
2831 subscripts.release ();
2832 }
2833
2834 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2835 description. */
2836
2837 static inline void
2838 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2839 tree chrec)
2840 {
2841 DDR_ARE_DEPENDENT (ddr) = chrec;
2842 free_subscripts (DDR_SUBSCRIPTS (ddr));
2843 DDR_SUBSCRIPTS (ddr).create (0);
2844 }
2845
2846 /* The dependence relation DDR cannot be represented by a distance
2847 vector. */
2848
2849 static inline void
2850 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2851 {
2852 if (dump_file && (dump_flags & TDF_DETAILS))
2853 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2854
2855 DDR_AFFINE_P (ddr) = false;
2856 }
2857
2858 \f
2859
2860 /* This section contains the classic Banerjee tests. */
2861
2862 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2863 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2864
2865 static inline bool
2866 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2867 {
2868 return (evolution_function_is_constant_p (chrec_a)
2869 && evolution_function_is_constant_p (chrec_b));
2870 }
2871
2872 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2873 variable, i.e., if the SIV (Single Index Variable) test is true. */
2874
2875 static bool
2876 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2877 {
2878 if ((evolution_function_is_constant_p (chrec_a)
2879 && evolution_function_is_univariate_p (chrec_b))
2880 || (evolution_function_is_constant_p (chrec_b)
2881 && evolution_function_is_univariate_p (chrec_a)))
2882 return true;
2883
2884 if (evolution_function_is_univariate_p (chrec_a)
2885 && evolution_function_is_univariate_p (chrec_b))
2886 {
2887 switch (TREE_CODE (chrec_a))
2888 {
2889 case POLYNOMIAL_CHREC:
2890 switch (TREE_CODE (chrec_b))
2891 {
2892 case POLYNOMIAL_CHREC:
2893 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2894 return false;
2895 /* FALLTHRU */
2896
2897 default:
2898 return true;
2899 }
2900
2901 default:
2902 return true;
2903 }
2904 }
2905
2906 return false;
2907 }
2908
2909 /* Creates a conflict function with N dimensions. The affine functions
2910 in each dimension follow. */
2911
2912 static conflict_function *
2913 conflict_fn (unsigned n, ...)
2914 {
2915 unsigned i;
2916 conflict_function *ret = XCNEW (conflict_function);
2917 va_list ap;
2918
2919 gcc_assert (n > 0 && n <= MAX_DIM);
2920 va_start (ap, n);
2921
2922 ret->n = n;
2923 for (i = 0; i < n; i++)
2924 ret->fns[i] = va_arg (ap, affine_fn);
2925 va_end (ap);
2926
2927 return ret;
2928 }
2929
2930 /* Returns constant affine function with value CST. */
2931
2932 static affine_fn
2933 affine_fn_cst (tree cst)
2934 {
2935 affine_fn fn;
2936 fn.create (1);
2937 fn.quick_push (cst);
2938 return fn;
2939 }
2940
2941 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2942
2943 static affine_fn
2944 affine_fn_univar (tree cst, unsigned dim, tree coef)
2945 {
2946 affine_fn fn;
2947 fn.create (dim + 1);
2948 unsigned i;
2949
2950 gcc_assert (dim > 0);
2951 fn.quick_push (cst);
2952 for (i = 1; i < dim; i++)
2953 fn.quick_push (integer_zero_node);
2954 fn.quick_push (coef);
2955 return fn;
2956 }
2957
2958 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2959 *OVERLAPS_B are initialized to the functions that describe the
2960 relation between the elements accessed twice by CHREC_A and
2961 CHREC_B. For k >= 0, the following property is verified:
2962
2963 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2964
2965 static void
2966 analyze_ziv_subscript (tree chrec_a,
2967 tree chrec_b,
2968 conflict_function **overlaps_a,
2969 conflict_function **overlaps_b,
2970 tree *last_conflicts)
2971 {
2972 tree type, difference;
2973 dependence_stats.num_ziv++;
2974
2975 if (dump_file && (dump_flags & TDF_DETAILS))
2976 fprintf (dump_file, "(analyze_ziv_subscript \n");
2977
2978 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2979 chrec_a = chrec_convert (type, chrec_a, NULL);
2980 chrec_b = chrec_convert (type, chrec_b, NULL);
2981 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2982
2983 switch (TREE_CODE (difference))
2984 {
2985 case INTEGER_CST:
2986 if (integer_zerop (difference))
2987 {
2988 /* The difference is equal to zero: the accessed index
2989 overlaps for each iteration in the loop. */
2990 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2991 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2992 *last_conflicts = chrec_dont_know;
2993 dependence_stats.num_ziv_dependent++;
2994 }
2995 else
2996 {
2997 /* The accesses do not overlap. */
2998 *overlaps_a = conflict_fn_no_dependence ();
2999 *overlaps_b = conflict_fn_no_dependence ();
3000 *last_conflicts = integer_zero_node;
3001 dependence_stats.num_ziv_independent++;
3002 }
3003 break;
3004
3005 default:
3006 /* We're not sure whether the indexes overlap. For the moment,
3007 conservatively answer "don't know". */
3008 if (dump_file && (dump_flags & TDF_DETAILS))
3009 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3010
3011 *overlaps_a = conflict_fn_not_known ();
3012 *overlaps_b = conflict_fn_not_known ();
3013 *last_conflicts = chrec_dont_know;
3014 dependence_stats.num_ziv_unimplemented++;
3015 break;
3016 }
3017
3018 if (dump_file && (dump_flags & TDF_DETAILS))
3019 fprintf (dump_file, ")\n");
3020 }
3021
3022 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3023 and only if it fits to the int type. If this is not the case, or the
3024 bound on the number of iterations of LOOP could not be derived, returns
3025 chrec_dont_know. */
3026
3027 static tree
3028 max_stmt_executions_tree (class loop *loop)
3029 {
3030 widest_int nit;
3031
3032 if (!max_stmt_executions (loop, &nit))
3033 return chrec_dont_know;
3034
3035 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3036 return chrec_dont_know;
3037
3038 return wide_int_to_tree (unsigned_type_node, nit);
3039 }
3040
3041 /* Determine whether the CHREC is always positive/negative. If the expression
3042 cannot be statically analyzed, return false, otherwise set the answer into
3043 VALUE. */
3044
3045 static bool
3046 chrec_is_positive (tree chrec, bool *value)
3047 {
3048 bool value0, value1, value2;
3049 tree end_value, nb_iter;
3050
3051 switch (TREE_CODE (chrec))
3052 {
3053 case POLYNOMIAL_CHREC:
3054 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3055 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3056 return false;
3057
3058 /* FIXME -- overflows. */
3059 if (value0 == value1)
3060 {
3061 *value = value0;
3062 return true;
3063 }
3064
3065 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3066 and the proof consists in showing that the sign never
3067 changes during the execution of the loop, from 0 to
3068 loop->nb_iterations. */
3069 if (!evolution_function_is_affine_p (chrec))
3070 return false;
3071
3072 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3073 if (chrec_contains_undetermined (nb_iter))
3074 return false;
3075
3076 #if 0
3077 /* TODO -- If the test is after the exit, we may decrease the number of
3078 iterations by one. */
3079 if (after_exit)
3080 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3081 #endif
3082
3083 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3084
3085 if (!chrec_is_positive (end_value, &value2))
3086 return false;
3087
3088 *value = value0;
3089 return value0 == value1;
3090
3091 case INTEGER_CST:
3092 switch (tree_int_cst_sgn (chrec))
3093 {
3094 case -1:
3095 *value = false;
3096 break;
3097 case 1:
3098 *value = true;
3099 break;
3100 default:
3101 return false;
3102 }
3103 return true;
3104
3105 default:
3106 return false;
3107 }
3108 }
3109
3110
3111 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3112 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3113 *OVERLAPS_B are initialized to the functions that describe the
3114 relation between the elements accessed twice by CHREC_A and
3115 CHREC_B. For k >= 0, the following property is verified:
3116
3117 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3118
3119 static void
3120 analyze_siv_subscript_cst_affine (tree chrec_a,
3121 tree chrec_b,
3122 conflict_function **overlaps_a,
3123 conflict_function **overlaps_b,
3124 tree *last_conflicts)
3125 {
3126 bool value0, value1, value2;
3127 tree type, difference, tmp;
3128
3129 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3130 chrec_a = chrec_convert (type, chrec_a, NULL);
3131 chrec_b = chrec_convert (type, chrec_b, NULL);
3132 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3133
3134 /* Special case overlap in the first iteration. */
3135 if (integer_zerop (difference))
3136 {
3137 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3138 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3139 *last_conflicts = integer_one_node;
3140 return;
3141 }
3142
3143 if (!chrec_is_positive (initial_condition (difference), &value0))
3144 {
3145 if (dump_file && (dump_flags & TDF_DETAILS))
3146 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3147
3148 dependence_stats.num_siv_unimplemented++;
3149 *overlaps_a = conflict_fn_not_known ();
3150 *overlaps_b = conflict_fn_not_known ();
3151 *last_conflicts = chrec_dont_know;
3152 return;
3153 }
3154 else
3155 {
3156 if (value0 == false)
3157 {
3158 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3159 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3160 {
3161 if (dump_file && (dump_flags & TDF_DETAILS))
3162 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3163
3164 *overlaps_a = conflict_fn_not_known ();
3165 *overlaps_b = conflict_fn_not_known ();
3166 *last_conflicts = chrec_dont_know;
3167 dependence_stats.num_siv_unimplemented++;
3168 return;
3169 }
3170 else
3171 {
3172 if (value1 == true)
3173 {
3174 /* Example:
3175 chrec_a = 12
3176 chrec_b = {10, +, 1}
3177 */
3178
3179 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3180 {
3181 HOST_WIDE_INT numiter;
3182 class loop *loop = get_chrec_loop (chrec_b);
3183
3184 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3185 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3186 fold_build1 (ABS_EXPR, type, difference),
3187 CHREC_RIGHT (chrec_b));
3188 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3189 *last_conflicts = integer_one_node;
3190
3191
3192 /* Perform weak-zero siv test to see if overlap is
3193 outside the loop bounds. */
3194 numiter = max_stmt_executions_int (loop);
3195
3196 if (numiter >= 0
3197 && compare_tree_int (tmp, numiter) > 0)
3198 {
3199 free_conflict_function (*overlaps_a);
3200 free_conflict_function (*overlaps_b);
3201 *overlaps_a = conflict_fn_no_dependence ();
3202 *overlaps_b = conflict_fn_no_dependence ();
3203 *last_conflicts = integer_zero_node;
3204 dependence_stats.num_siv_independent++;
3205 return;
3206 }
3207 dependence_stats.num_siv_dependent++;
3208 return;
3209 }
3210
3211 /* When the step does not divide the difference, there are
3212 no overlaps. */
3213 else
3214 {
3215 *overlaps_a = conflict_fn_no_dependence ();
3216 *overlaps_b = conflict_fn_no_dependence ();
3217 *last_conflicts = integer_zero_node;
3218 dependence_stats.num_siv_independent++;
3219 return;
3220 }
3221 }
3222
3223 else
3224 {
3225 /* Example:
3226 chrec_a = 12
3227 chrec_b = {10, +, -1}
3228
3229 In this case, chrec_a will not overlap with chrec_b. */
3230 *overlaps_a = conflict_fn_no_dependence ();
3231 *overlaps_b = conflict_fn_no_dependence ();
3232 *last_conflicts = integer_zero_node;
3233 dependence_stats.num_siv_independent++;
3234 return;
3235 }
3236 }
3237 }
3238 else
3239 {
3240 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3241 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3242 {
3243 if (dump_file && (dump_flags & TDF_DETAILS))
3244 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3245
3246 *overlaps_a = conflict_fn_not_known ();
3247 *overlaps_b = conflict_fn_not_known ();
3248 *last_conflicts = chrec_dont_know;
3249 dependence_stats.num_siv_unimplemented++;
3250 return;
3251 }
3252 else
3253 {
3254 if (value2 == false)
3255 {
3256 /* Example:
3257 chrec_a = 3
3258 chrec_b = {10, +, -1}
3259 */
3260 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3261 {
3262 HOST_WIDE_INT numiter;
3263 class loop *loop = get_chrec_loop (chrec_b);
3264
3265 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3266 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3267 CHREC_RIGHT (chrec_b));
3268 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3269 *last_conflicts = integer_one_node;
3270
3271 /* Perform weak-zero siv test to see if overlap is
3272 outside the loop bounds. */
3273 numiter = max_stmt_executions_int (loop);
3274
3275 if (numiter >= 0
3276 && compare_tree_int (tmp, numiter) > 0)
3277 {
3278 free_conflict_function (*overlaps_a);
3279 free_conflict_function (*overlaps_b);
3280 *overlaps_a = conflict_fn_no_dependence ();
3281 *overlaps_b = conflict_fn_no_dependence ();
3282 *last_conflicts = integer_zero_node;
3283 dependence_stats.num_siv_independent++;
3284 return;
3285 }
3286 dependence_stats.num_siv_dependent++;
3287 return;
3288 }
3289
3290 /* When the step does not divide the difference, there
3291 are no overlaps. */
3292 else
3293 {
3294 *overlaps_a = conflict_fn_no_dependence ();
3295 *overlaps_b = conflict_fn_no_dependence ();
3296 *last_conflicts = integer_zero_node;
3297 dependence_stats.num_siv_independent++;
3298 return;
3299 }
3300 }
3301 else
3302 {
3303 /* Example:
3304 chrec_a = 3
3305 chrec_b = {4, +, 1}
3306
3307 In this case, chrec_a will not overlap with chrec_b. */
3308 *overlaps_a = conflict_fn_no_dependence ();
3309 *overlaps_b = conflict_fn_no_dependence ();
3310 *last_conflicts = integer_zero_node;
3311 dependence_stats.num_siv_independent++;
3312 return;
3313 }
3314 }
3315 }
3316 }
3317 }
3318
3319 /* Helper recursive function for initializing the matrix A. Returns
3320 the initial value of CHREC. */
3321
3322 static tree
3323 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3324 {
3325 gcc_assert (chrec);
3326
3327 switch (TREE_CODE (chrec))
3328 {
3329 case POLYNOMIAL_CHREC:
3330 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3331 return chrec_dont_know;
3332 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3333 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3334
3335 case PLUS_EXPR:
3336 case MULT_EXPR:
3337 case MINUS_EXPR:
3338 {
3339 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3340 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3341
3342 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3343 }
3344
3345 CASE_CONVERT:
3346 {
3347 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3348 return chrec_convert (chrec_type (chrec), op, NULL);
3349 }
3350
3351 case BIT_NOT_EXPR:
3352 {
3353 /* Handle ~X as -1 - X. */
3354 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3355 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3356 build_int_cst (TREE_TYPE (chrec), -1), op);
3357 }
3358
3359 case INTEGER_CST:
3360 return chrec;
3361
3362 default:
3363 gcc_unreachable ();
3364 return NULL_TREE;
3365 }
3366 }
3367
3368 #define FLOOR_DIV(x,y) ((x) / (y))
3369
3370 /* Solves the special case of the Diophantine equation:
3371 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3372
3373 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3374 number of iterations that loops X and Y run. The overlaps will be
3375 constructed as evolutions in dimension DIM. */
3376
3377 static void
3378 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3379 HOST_WIDE_INT step_a,
3380 HOST_WIDE_INT step_b,
3381 affine_fn *overlaps_a,
3382 affine_fn *overlaps_b,
3383 tree *last_conflicts, int dim)
3384 {
3385 if (((step_a > 0 && step_b > 0)
3386 || (step_a < 0 && step_b < 0)))
3387 {
3388 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3389 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3390
3391 gcd_steps_a_b = gcd (step_a, step_b);
3392 step_overlaps_a = step_b / gcd_steps_a_b;
3393 step_overlaps_b = step_a / gcd_steps_a_b;
3394
3395 if (niter > 0)
3396 {
3397 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3398 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3399 last_conflict = tau2;
3400 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3401 }
3402 else
3403 *last_conflicts = chrec_dont_know;
3404
3405 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3406 build_int_cst (NULL_TREE,
3407 step_overlaps_a));
3408 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3409 build_int_cst (NULL_TREE,
3410 step_overlaps_b));
3411 }
3412
3413 else
3414 {
3415 *overlaps_a = affine_fn_cst (integer_zero_node);
3416 *overlaps_b = affine_fn_cst (integer_zero_node);
3417 *last_conflicts = integer_zero_node;
3418 }
3419 }
3420
3421 /* Solves the special case of a Diophantine equation where CHREC_A is
3422 an affine bivariate function, and CHREC_B is an affine univariate
3423 function. For example,
3424
3425 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3426
3427 has the following overlapping functions:
3428
3429 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3430 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3431 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3432
3433 FORNOW: This is a specialized implementation for a case occurring in
3434 a common benchmark. Implement the general algorithm. */
3435
3436 static void
3437 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3438 conflict_function **overlaps_a,
3439 conflict_function **overlaps_b,
3440 tree *last_conflicts)
3441 {
3442 bool xz_p, yz_p, xyz_p;
3443 HOST_WIDE_INT step_x, step_y, step_z;
3444 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3445 affine_fn overlaps_a_xz, overlaps_b_xz;
3446 affine_fn overlaps_a_yz, overlaps_b_yz;
3447 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3448 affine_fn ova1, ova2, ovb;
3449 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3450
3451 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3452 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3453 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3454
3455 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3456 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3457 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3458
3459 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3460 {
3461 if (dump_file && (dump_flags & TDF_DETAILS))
3462 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3463
3464 *overlaps_a = conflict_fn_not_known ();
3465 *overlaps_b = conflict_fn_not_known ();
3466 *last_conflicts = chrec_dont_know;
3467 return;
3468 }
3469
3470 niter = MIN (niter_x, niter_z);
3471 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3472 &overlaps_a_xz,
3473 &overlaps_b_xz,
3474 &last_conflicts_xz, 1);
3475 niter = MIN (niter_y, niter_z);
3476 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3477 &overlaps_a_yz,
3478 &overlaps_b_yz,
3479 &last_conflicts_yz, 2);
3480 niter = MIN (niter_x, niter_z);
3481 niter = MIN (niter_y, niter);
3482 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3483 &overlaps_a_xyz,
3484 &overlaps_b_xyz,
3485 &last_conflicts_xyz, 3);
3486
3487 xz_p = !integer_zerop (last_conflicts_xz);
3488 yz_p = !integer_zerop (last_conflicts_yz);
3489 xyz_p = !integer_zerop (last_conflicts_xyz);
3490
3491 if (xz_p || yz_p || xyz_p)
3492 {
3493 ova1 = affine_fn_cst (integer_zero_node);
3494 ova2 = affine_fn_cst (integer_zero_node);
3495 ovb = affine_fn_cst (integer_zero_node);
3496 if (xz_p)
3497 {
3498 affine_fn t0 = ova1;
3499 affine_fn t2 = ovb;
3500
3501 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3502 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3503 affine_fn_free (t0);
3504 affine_fn_free (t2);
3505 *last_conflicts = last_conflicts_xz;
3506 }
3507 if (yz_p)
3508 {
3509 affine_fn t0 = ova2;
3510 affine_fn t2 = ovb;
3511
3512 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3513 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3514 affine_fn_free (t0);
3515 affine_fn_free (t2);
3516 *last_conflicts = last_conflicts_yz;
3517 }
3518 if (xyz_p)
3519 {
3520 affine_fn t0 = ova1;
3521 affine_fn t2 = ova2;
3522 affine_fn t4 = ovb;
3523
3524 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3525 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3526 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3527 affine_fn_free (t0);
3528 affine_fn_free (t2);
3529 affine_fn_free (t4);
3530 *last_conflicts = last_conflicts_xyz;
3531 }
3532 *overlaps_a = conflict_fn (2, ova1, ova2);
3533 *overlaps_b = conflict_fn (1, ovb);
3534 }
3535 else
3536 {
3537 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3538 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3539 *last_conflicts = integer_zero_node;
3540 }
3541
3542 affine_fn_free (overlaps_a_xz);
3543 affine_fn_free (overlaps_b_xz);
3544 affine_fn_free (overlaps_a_yz);
3545 affine_fn_free (overlaps_b_yz);
3546 affine_fn_free (overlaps_a_xyz);
3547 affine_fn_free (overlaps_b_xyz);
3548 }
3549
3550 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3551
3552 static void
3553 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3554 int size)
3555 {
3556 memcpy (vec2, vec1, size * sizeof (*vec1));
3557 }
3558
3559 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3560
3561 static void
3562 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3563 int m, int n)
3564 {
3565 int i;
3566
3567 for (i = 0; i < m; i++)
3568 lambda_vector_copy (mat1[i], mat2[i], n);
3569 }
3570
3571 /* Store the N x N identity matrix in MAT. */
3572
3573 static void
3574 lambda_matrix_id (lambda_matrix mat, int size)
3575 {
3576 int i, j;
3577
3578 for (i = 0; i < size; i++)
3579 for (j = 0; j < size; j++)
3580 mat[i][j] = (i == j) ? 1 : 0;
3581 }
3582
3583 /* Return the index of the first nonzero element of vector VEC1 between
3584 START and N. We must have START <= N.
3585 Returns N if VEC1 is the zero vector. */
3586
3587 static int
3588 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3589 {
3590 int j = start;
3591 while (j < n && vec1[j] == 0)
3592 j++;
3593 return j;
3594 }
3595
3596 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3597 R2 = R2 + CONST1 * R1. */
3598
3599 static void
3600 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3601 lambda_int const1)
3602 {
3603 int i;
3604
3605 if (const1 == 0)
3606 return;
3607
3608 for (i = 0; i < n; i++)
3609 mat[r2][i] += const1 * mat[r1][i];
3610 }
3611
3612 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3613 and store the result in VEC2. */
3614
3615 static void
3616 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3617 int size, lambda_int const1)
3618 {
3619 int i;
3620
3621 if (const1 == 0)
3622 lambda_vector_clear (vec2, size);
3623 else
3624 for (i = 0; i < size; i++)
3625 vec2[i] = const1 * vec1[i];
3626 }
3627
3628 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3629
3630 static void
3631 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3632 int size)
3633 {
3634 lambda_vector_mult_const (vec1, vec2, size, -1);
3635 }
3636
3637 /* Negate row R1 of matrix MAT which has N columns. */
3638
3639 static void
3640 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3641 {
3642 lambda_vector_negate (mat[r1], mat[r1], n);
3643 }
3644
3645 /* Return true if two vectors are equal. */
3646
3647 static bool
3648 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3649 {
3650 int i;
3651 for (i = 0; i < size; i++)
3652 if (vec1[i] != vec2[i])
3653 return false;
3654 return true;
3655 }
3656
3657 /* Given an M x N integer matrix A, this function determines an M x
3658 M unimodular matrix U, and an M x N echelon matrix S such that
3659 "U.A = S". This decomposition is also known as "right Hermite".
3660
3661 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3662 Restructuring Compilers" Utpal Banerjee. */
3663
3664 static void
3665 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3666 lambda_matrix S, lambda_matrix U)
3667 {
3668 int i, j, i0 = 0;
3669
3670 lambda_matrix_copy (A, S, m, n);
3671 lambda_matrix_id (U, m);
3672
3673 for (j = 0; j < n; j++)
3674 {
3675 if (lambda_vector_first_nz (S[j], m, i0) < m)
3676 {
3677 ++i0;
3678 for (i = m - 1; i >= i0; i--)
3679 {
3680 while (S[i][j] != 0)
3681 {
3682 lambda_int sigma, factor, a, b;
3683
3684 a = S[i-1][j];
3685 b = S[i][j];
3686 sigma = (a * b < 0) ? -1: 1;
3687 a = abs_hwi (a);
3688 b = abs_hwi (b);
3689 factor = sigma * (a / b);
3690
3691 lambda_matrix_row_add (S, n, i, i-1, -factor);
3692 std::swap (S[i], S[i-1]);
3693
3694 lambda_matrix_row_add (U, m, i, i-1, -factor);
3695 std::swap (U[i], U[i-1]);
3696 }
3697 }
3698 }
3699 }
3700 }
3701
3702 /* Determines the overlapping elements due to accesses CHREC_A and
3703 CHREC_B, that are affine functions. This function cannot handle
3704 symbolic evolution functions, ie. when initial conditions are
3705 parameters, because it uses lambda matrices of integers. */
3706
3707 static void
3708 analyze_subscript_affine_affine (tree chrec_a,
3709 tree chrec_b,
3710 conflict_function **overlaps_a,
3711 conflict_function **overlaps_b,
3712 tree *last_conflicts)
3713 {
3714 unsigned nb_vars_a, nb_vars_b, dim;
3715 HOST_WIDE_INT gamma, gcd_alpha_beta;
3716 lambda_matrix A, U, S;
3717 struct obstack scratch_obstack;
3718
3719 if (eq_evolutions_p (chrec_a, chrec_b))
3720 {
3721 /* The accessed index overlaps for each iteration in the
3722 loop. */
3723 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3724 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3725 *last_conflicts = chrec_dont_know;
3726 return;
3727 }
3728 if (dump_file && (dump_flags & TDF_DETAILS))
3729 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3730
3731 /* For determining the initial intersection, we have to solve a
3732 Diophantine equation. This is the most time consuming part.
3733
3734 For answering to the question: "Is there a dependence?" we have
3735 to prove that there exists a solution to the Diophantine
3736 equation, and that the solution is in the iteration domain,
3737 i.e. the solution is positive or zero, and that the solution
3738 happens before the upper bound loop.nb_iterations. Otherwise
3739 there is no dependence. This function outputs a description of
3740 the iterations that hold the intersections. */
3741
3742 nb_vars_a = nb_vars_in_chrec (chrec_a);
3743 nb_vars_b = nb_vars_in_chrec (chrec_b);
3744
3745 gcc_obstack_init (&scratch_obstack);
3746
3747 dim = nb_vars_a + nb_vars_b;
3748 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3749 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3750 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3751
3752 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
3753 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
3754 if (init_a == chrec_dont_know
3755 || init_b == chrec_dont_know)
3756 {
3757 if (dump_file && (dump_flags & TDF_DETAILS))
3758 fprintf (dump_file, "affine-affine test failed: "
3759 "representation issue.\n");
3760 *overlaps_a = conflict_fn_not_known ();
3761 *overlaps_b = conflict_fn_not_known ();
3762 *last_conflicts = chrec_dont_know;
3763 goto end_analyze_subs_aa;
3764 }
3765 gamma = int_cst_value (init_b) - int_cst_value (init_a);
3766
3767 /* Don't do all the hard work of solving the Diophantine equation
3768 when we already know the solution: for example,
3769 | {3, +, 1}_1
3770 | {3, +, 4}_2
3771 | gamma = 3 - 3 = 0.
3772 Then the first overlap occurs during the first iterations:
3773 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3774 */
3775 if (gamma == 0)
3776 {
3777 if (nb_vars_a == 1 && nb_vars_b == 1)
3778 {
3779 HOST_WIDE_INT step_a, step_b;
3780 HOST_WIDE_INT niter, niter_a, niter_b;
3781 affine_fn ova, ovb;
3782
3783 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3784 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3785 niter = MIN (niter_a, niter_b);
3786 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3787 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3788
3789 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3790 &ova, &ovb,
3791 last_conflicts, 1);
3792 *overlaps_a = conflict_fn (1, ova);
3793 *overlaps_b = conflict_fn (1, ovb);
3794 }
3795
3796 else if (nb_vars_a == 2 && nb_vars_b == 1)
3797 compute_overlap_steps_for_affine_1_2
3798 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3799
3800 else if (nb_vars_a == 1 && nb_vars_b == 2)
3801 compute_overlap_steps_for_affine_1_2
3802 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3803
3804 else
3805 {
3806 if (dump_file && (dump_flags & TDF_DETAILS))
3807 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3808 *overlaps_a = conflict_fn_not_known ();
3809 *overlaps_b = conflict_fn_not_known ();
3810 *last_conflicts = chrec_dont_know;
3811 }
3812 goto end_analyze_subs_aa;
3813 }
3814
3815 /* U.A = S */
3816 lambda_matrix_right_hermite (A, dim, 1, S, U);
3817
3818 if (S[0][0] < 0)
3819 {
3820 S[0][0] *= -1;
3821 lambda_matrix_row_negate (U, dim, 0);
3822 }
3823 gcd_alpha_beta = S[0][0];
3824
3825 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3826 but that is a quite strange case. Instead of ICEing, answer
3827 don't know. */
3828 if (gcd_alpha_beta == 0)
3829 {
3830 *overlaps_a = conflict_fn_not_known ();
3831 *overlaps_b = conflict_fn_not_known ();
3832 *last_conflicts = chrec_dont_know;
3833 goto end_analyze_subs_aa;
3834 }
3835
3836 /* The classic "gcd-test". */
3837 if (!int_divides_p (gcd_alpha_beta, gamma))
3838 {
3839 /* The "gcd-test" has determined that there is no integer
3840 solution, i.e. there is no dependence. */
3841 *overlaps_a = conflict_fn_no_dependence ();
3842 *overlaps_b = conflict_fn_no_dependence ();
3843 *last_conflicts = integer_zero_node;
3844 }
3845
3846 /* Both access functions are univariate. This includes SIV and MIV cases. */
3847 else if (nb_vars_a == 1 && nb_vars_b == 1)
3848 {
3849 /* Both functions should have the same evolution sign. */
3850 if (((A[0][0] > 0 && -A[1][0] > 0)
3851 || (A[0][0] < 0 && -A[1][0] < 0)))
3852 {
3853 /* The solutions are given by:
3854 |
3855 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3856 | [u21 u22] [y0]
3857
3858 For a given integer t. Using the following variables,
3859
3860 | i0 = u11 * gamma / gcd_alpha_beta
3861 | j0 = u12 * gamma / gcd_alpha_beta
3862 | i1 = u21
3863 | j1 = u22
3864
3865 the solutions are:
3866
3867 | x0 = i0 + i1 * t,
3868 | y0 = j0 + j1 * t. */
3869 HOST_WIDE_INT i0, j0, i1, j1;
3870
3871 i0 = U[0][0] * gamma / gcd_alpha_beta;
3872 j0 = U[0][1] * gamma / gcd_alpha_beta;
3873 i1 = U[1][0];
3874 j1 = U[1][1];
3875
3876 if ((i1 == 0 && i0 < 0)
3877 || (j1 == 0 && j0 < 0))
3878 {
3879 /* There is no solution.
3880 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3881 falls in here, but for the moment we don't look at the
3882 upper bound of the iteration domain. */
3883 *overlaps_a = conflict_fn_no_dependence ();
3884 *overlaps_b = conflict_fn_no_dependence ();
3885 *last_conflicts = integer_zero_node;
3886 goto end_analyze_subs_aa;
3887 }
3888
3889 if (i1 > 0 && j1 > 0)
3890 {
3891 HOST_WIDE_INT niter_a
3892 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3893 HOST_WIDE_INT niter_b
3894 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3895 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3896
3897 /* (X0, Y0) is a solution of the Diophantine equation:
3898 "chrec_a (X0) = chrec_b (Y0)". */
3899 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3900 CEIL (-j0, j1));
3901 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3902 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3903
3904 /* (X1, Y1) is the smallest positive solution of the eq
3905 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3906 first conflict occurs. */
3907 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3908 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3909 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3910
3911 if (niter > 0)
3912 {
3913 /* If the overlap occurs outside of the bounds of the
3914 loop, there is no dependence. */
3915 if (x1 >= niter_a || y1 >= niter_b)
3916 {
3917 *overlaps_a = conflict_fn_no_dependence ();
3918 *overlaps_b = conflict_fn_no_dependence ();
3919 *last_conflicts = integer_zero_node;
3920 goto end_analyze_subs_aa;
3921 }
3922
3923 /* max stmt executions can get quite large, avoid
3924 overflows by using wide ints here. */
3925 widest_int tau2
3926 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
3927 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
3928 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
3929 if (wi::min_precision (last_conflict, SIGNED)
3930 <= TYPE_PRECISION (integer_type_node))
3931 *last_conflicts
3932 = build_int_cst (integer_type_node,
3933 last_conflict.to_shwi ());
3934 else
3935 *last_conflicts = chrec_dont_know;
3936 }
3937 else
3938 *last_conflicts = chrec_dont_know;
3939
3940 *overlaps_a
3941 = conflict_fn (1,
3942 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3943 1,
3944 build_int_cst (NULL_TREE, i1)));
3945 *overlaps_b
3946 = conflict_fn (1,
3947 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3948 1,
3949 build_int_cst (NULL_TREE, j1)));
3950 }
3951 else
3952 {
3953 /* FIXME: For the moment, the upper bound of the
3954 iteration domain for i and j is not checked. */
3955 if (dump_file && (dump_flags & TDF_DETAILS))
3956 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3957 *overlaps_a = conflict_fn_not_known ();
3958 *overlaps_b = conflict_fn_not_known ();
3959 *last_conflicts = chrec_dont_know;
3960 }
3961 }
3962 else
3963 {
3964 if (dump_file && (dump_flags & TDF_DETAILS))
3965 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3966 *overlaps_a = conflict_fn_not_known ();
3967 *overlaps_b = conflict_fn_not_known ();
3968 *last_conflicts = chrec_dont_know;
3969 }
3970 }
3971 else
3972 {
3973 if (dump_file && (dump_flags & TDF_DETAILS))
3974 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3975 *overlaps_a = conflict_fn_not_known ();
3976 *overlaps_b = conflict_fn_not_known ();
3977 *last_conflicts = chrec_dont_know;
3978 }
3979
3980 end_analyze_subs_aa:
3981 obstack_free (&scratch_obstack, NULL);
3982 if (dump_file && (dump_flags & TDF_DETAILS))
3983 {
3984 fprintf (dump_file, " (overlaps_a = ");
3985 dump_conflict_function (dump_file, *overlaps_a);
3986 fprintf (dump_file, ")\n (overlaps_b = ");
3987 dump_conflict_function (dump_file, *overlaps_b);
3988 fprintf (dump_file, "))\n");
3989 }
3990 }
3991
3992 /* Returns true when analyze_subscript_affine_affine can be used for
3993 determining the dependence relation between chrec_a and chrec_b,
3994 that contain symbols. This function modifies chrec_a and chrec_b
3995 such that the analysis result is the same, and such that they don't
3996 contain symbols, and then can safely be passed to the analyzer.
3997
3998 Example: The analysis of the following tuples of evolutions produce
3999 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4000 vs. {0, +, 1}_1
4001
4002 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4003 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4004 */
4005
4006 static bool
4007 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4008 {
4009 tree diff, type, left_a, left_b, right_b;
4010
4011 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4012 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4013 /* FIXME: For the moment not handled. Might be refined later. */
4014 return false;
4015
4016 type = chrec_type (*chrec_a);
4017 left_a = CHREC_LEFT (*chrec_a);
4018 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4019 diff = chrec_fold_minus (type, left_a, left_b);
4020
4021 if (!evolution_function_is_constant_p (diff))
4022 return false;
4023
4024 if (dump_file && (dump_flags & TDF_DETAILS))
4025 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4026
4027 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4028 diff, CHREC_RIGHT (*chrec_a));
4029 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4030 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4031 build_int_cst (type, 0),
4032 right_b);
4033 return true;
4034 }
4035
4036 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4037 *OVERLAPS_B are initialized to the functions that describe the
4038 relation between the elements accessed twice by CHREC_A and
4039 CHREC_B. For k >= 0, the following property is verified:
4040
4041 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4042
4043 static void
4044 analyze_siv_subscript (tree chrec_a,
4045 tree chrec_b,
4046 conflict_function **overlaps_a,
4047 conflict_function **overlaps_b,
4048 tree *last_conflicts,
4049 int loop_nest_num)
4050 {
4051 dependence_stats.num_siv++;
4052
4053 if (dump_file && (dump_flags & TDF_DETAILS))
4054 fprintf (dump_file, "(analyze_siv_subscript \n");
4055
4056 if (evolution_function_is_constant_p (chrec_a)
4057 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4058 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4059 overlaps_a, overlaps_b, last_conflicts);
4060
4061 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4062 && evolution_function_is_constant_p (chrec_b))
4063 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4064 overlaps_b, overlaps_a, last_conflicts);
4065
4066 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4067 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4068 {
4069 if (!chrec_contains_symbols (chrec_a)
4070 && !chrec_contains_symbols (chrec_b))
4071 {
4072 analyze_subscript_affine_affine (chrec_a, chrec_b,
4073 overlaps_a, overlaps_b,
4074 last_conflicts);
4075
4076 if (CF_NOT_KNOWN_P (*overlaps_a)
4077 || CF_NOT_KNOWN_P (*overlaps_b))
4078 dependence_stats.num_siv_unimplemented++;
4079 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4080 || CF_NO_DEPENDENCE_P (*overlaps_b))
4081 dependence_stats.num_siv_independent++;
4082 else
4083 dependence_stats.num_siv_dependent++;
4084 }
4085 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4086 &chrec_b))
4087 {
4088 analyze_subscript_affine_affine (chrec_a, chrec_b,
4089 overlaps_a, overlaps_b,
4090 last_conflicts);
4091
4092 if (CF_NOT_KNOWN_P (*overlaps_a)
4093 || CF_NOT_KNOWN_P (*overlaps_b))
4094 dependence_stats.num_siv_unimplemented++;
4095 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4096 || CF_NO_DEPENDENCE_P (*overlaps_b))
4097 dependence_stats.num_siv_independent++;
4098 else
4099 dependence_stats.num_siv_dependent++;
4100 }
4101 else
4102 goto siv_subscript_dontknow;
4103 }
4104
4105 else
4106 {
4107 siv_subscript_dontknow:;
4108 if (dump_file && (dump_flags & TDF_DETAILS))
4109 fprintf (dump_file, " siv test failed: unimplemented");
4110 *overlaps_a = conflict_fn_not_known ();
4111 *overlaps_b = conflict_fn_not_known ();
4112 *last_conflicts = chrec_dont_know;
4113 dependence_stats.num_siv_unimplemented++;
4114 }
4115
4116 if (dump_file && (dump_flags & TDF_DETAILS))
4117 fprintf (dump_file, ")\n");
4118 }
4119
4120 /* Returns false if we can prove that the greatest common divisor of the steps
4121 of CHREC does not divide CST, false otherwise. */
4122
4123 static bool
4124 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4125 {
4126 HOST_WIDE_INT cd = 0, val;
4127 tree step;
4128
4129 if (!tree_fits_shwi_p (cst))
4130 return true;
4131 val = tree_to_shwi (cst);
4132
4133 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4134 {
4135 step = CHREC_RIGHT (chrec);
4136 if (!tree_fits_shwi_p (step))
4137 return true;
4138 cd = gcd (cd, tree_to_shwi (step));
4139 chrec = CHREC_LEFT (chrec);
4140 }
4141
4142 return val % cd == 0;
4143 }
4144
4145 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4146 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4147 functions that describe the relation between the elements accessed
4148 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4149 is verified:
4150
4151 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4152
4153 static void
4154 analyze_miv_subscript (tree chrec_a,
4155 tree chrec_b,
4156 conflict_function **overlaps_a,
4157 conflict_function **overlaps_b,
4158 tree *last_conflicts,
4159 class loop *loop_nest)
4160 {
4161 tree type, difference;
4162
4163 dependence_stats.num_miv++;
4164 if (dump_file && (dump_flags & TDF_DETAILS))
4165 fprintf (dump_file, "(analyze_miv_subscript \n");
4166
4167 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4168 chrec_a = chrec_convert (type, chrec_a, NULL);
4169 chrec_b = chrec_convert (type, chrec_b, NULL);
4170 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4171
4172 if (eq_evolutions_p (chrec_a, chrec_b))
4173 {
4174 /* Access functions are the same: all the elements are accessed
4175 in the same order. */
4176 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4177 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4178 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4179 dependence_stats.num_miv_dependent++;
4180 }
4181
4182 else if (evolution_function_is_constant_p (difference)
4183 && evolution_function_is_affine_multivariate_p (chrec_a,
4184 loop_nest->num)
4185 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4186 {
4187 /* testsuite/.../ssa-chrec-33.c
4188 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4189
4190 The difference is 1, and all the evolution steps are multiples
4191 of 2, consequently there are no overlapping elements. */
4192 *overlaps_a = conflict_fn_no_dependence ();
4193 *overlaps_b = conflict_fn_no_dependence ();
4194 *last_conflicts = integer_zero_node;
4195 dependence_stats.num_miv_independent++;
4196 }
4197
4198 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4199 && !chrec_contains_symbols (chrec_a, loop_nest)
4200 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4201 && !chrec_contains_symbols (chrec_b, loop_nest))
4202 {
4203 /* testsuite/.../ssa-chrec-35.c
4204 {0, +, 1}_2 vs. {0, +, 1}_3
4205 the overlapping elements are respectively located at iterations:
4206 {0, +, 1}_x and {0, +, 1}_x,
4207 in other words, we have the equality:
4208 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4209
4210 Other examples:
4211 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4212 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4213
4214 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4215 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4216 */
4217 analyze_subscript_affine_affine (chrec_a, chrec_b,
4218 overlaps_a, overlaps_b, last_conflicts);
4219
4220 if (CF_NOT_KNOWN_P (*overlaps_a)
4221 || CF_NOT_KNOWN_P (*overlaps_b))
4222 dependence_stats.num_miv_unimplemented++;
4223 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4224 || CF_NO_DEPENDENCE_P (*overlaps_b))
4225 dependence_stats.num_miv_independent++;
4226 else
4227 dependence_stats.num_miv_dependent++;
4228 }
4229
4230 else
4231 {
4232 /* When the analysis is too difficult, answer "don't know". */
4233 if (dump_file && (dump_flags & TDF_DETAILS))
4234 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4235
4236 *overlaps_a = conflict_fn_not_known ();
4237 *overlaps_b = conflict_fn_not_known ();
4238 *last_conflicts = chrec_dont_know;
4239 dependence_stats.num_miv_unimplemented++;
4240 }
4241
4242 if (dump_file && (dump_flags & TDF_DETAILS))
4243 fprintf (dump_file, ")\n");
4244 }
4245
4246 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4247 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4248 OVERLAP_ITERATIONS_B are initialized with two functions that
4249 describe the iterations that contain conflicting elements.
4250
4251 Remark: For an integer k >= 0, the following equality is true:
4252
4253 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4254 */
4255
4256 static void
4257 analyze_overlapping_iterations (tree chrec_a,
4258 tree chrec_b,
4259 conflict_function **overlap_iterations_a,
4260 conflict_function **overlap_iterations_b,
4261 tree *last_conflicts, class loop *loop_nest)
4262 {
4263 unsigned int lnn = loop_nest->num;
4264
4265 dependence_stats.num_subscript_tests++;
4266
4267 if (dump_file && (dump_flags & TDF_DETAILS))
4268 {
4269 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4270 fprintf (dump_file, " (chrec_a = ");
4271 print_generic_expr (dump_file, chrec_a);
4272 fprintf (dump_file, ")\n (chrec_b = ");
4273 print_generic_expr (dump_file, chrec_b);
4274 fprintf (dump_file, ")\n");
4275 }
4276
4277 if (chrec_a == NULL_TREE
4278 || chrec_b == NULL_TREE
4279 || chrec_contains_undetermined (chrec_a)
4280 || chrec_contains_undetermined (chrec_b))
4281 {
4282 dependence_stats.num_subscript_undetermined++;
4283
4284 *overlap_iterations_a = conflict_fn_not_known ();
4285 *overlap_iterations_b = conflict_fn_not_known ();
4286 }
4287
4288 /* If they are the same chrec, and are affine, they overlap
4289 on every iteration. */
4290 else if (eq_evolutions_p (chrec_a, chrec_b)
4291 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4292 || operand_equal_p (chrec_a, chrec_b, 0)))
4293 {
4294 dependence_stats.num_same_subscript_function++;
4295 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4296 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4297 *last_conflicts = chrec_dont_know;
4298 }
4299
4300 /* If they aren't the same, and aren't affine, we can't do anything
4301 yet. */
4302 else if ((chrec_contains_symbols (chrec_a)
4303 || chrec_contains_symbols (chrec_b))
4304 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4305 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4306 {
4307 dependence_stats.num_subscript_undetermined++;
4308 *overlap_iterations_a = conflict_fn_not_known ();
4309 *overlap_iterations_b = conflict_fn_not_known ();
4310 }
4311
4312 else if (ziv_subscript_p (chrec_a, chrec_b))
4313 analyze_ziv_subscript (chrec_a, chrec_b,
4314 overlap_iterations_a, overlap_iterations_b,
4315 last_conflicts);
4316
4317 else if (siv_subscript_p (chrec_a, chrec_b))
4318 analyze_siv_subscript (chrec_a, chrec_b,
4319 overlap_iterations_a, overlap_iterations_b,
4320 last_conflicts, lnn);
4321
4322 else
4323 analyze_miv_subscript (chrec_a, chrec_b,
4324 overlap_iterations_a, overlap_iterations_b,
4325 last_conflicts, loop_nest);
4326
4327 if (dump_file && (dump_flags & TDF_DETAILS))
4328 {
4329 fprintf (dump_file, " (overlap_iterations_a = ");
4330 dump_conflict_function (dump_file, *overlap_iterations_a);
4331 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4332 dump_conflict_function (dump_file, *overlap_iterations_b);
4333 fprintf (dump_file, "))\n");
4334 }
4335 }
4336
4337 /* Helper function for uniquely inserting distance vectors. */
4338
4339 static void
4340 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4341 {
4342 unsigned i;
4343 lambda_vector v;
4344
4345 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4346 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4347 return;
4348
4349 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4350 }
4351
4352 /* Helper function for uniquely inserting direction vectors. */
4353
4354 static void
4355 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4356 {
4357 unsigned i;
4358 lambda_vector v;
4359
4360 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4361 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4362 return;
4363
4364 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4365 }
4366
4367 /* Add a distance of 1 on all the loops outer than INDEX. If we
4368 haven't yet determined a distance for this outer loop, push a new
4369 distance vector composed of the previous distance, and a distance
4370 of 1 for this outer loop. Example:
4371
4372 | loop_1
4373 | loop_2
4374 | A[10]
4375 | endloop_2
4376 | endloop_1
4377
4378 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4379 save (0, 1), then we have to save (1, 0). */
4380
4381 static void
4382 add_outer_distances (struct data_dependence_relation *ddr,
4383 lambda_vector dist_v, int index)
4384 {
4385 /* For each outer loop where init_v is not set, the accesses are
4386 in dependence of distance 1 in the loop. */
4387 while (--index >= 0)
4388 {
4389 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4390 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4391 save_v[index] = 1;
4392 save_dist_v (ddr, save_v);
4393 }
4394 }
4395
4396 /* Return false when fail to represent the data dependence as a
4397 distance vector. A_INDEX is the index of the first reference
4398 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4399 second reference. INIT_B is set to true when a component has been
4400 added to the distance vector DIST_V. INDEX_CARRY is then set to
4401 the index in DIST_V that carries the dependence. */
4402
4403 static bool
4404 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4405 unsigned int a_index, unsigned int b_index,
4406 lambda_vector dist_v, bool *init_b,
4407 int *index_carry)
4408 {
4409 unsigned i;
4410 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4411 class loop *loop = DDR_LOOP_NEST (ddr)[0];
4412
4413 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4414 {
4415 tree access_fn_a, access_fn_b;
4416 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4417
4418 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4419 {
4420 non_affine_dependence_relation (ddr);
4421 return false;
4422 }
4423
4424 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4425 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4426
4427 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4428 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4429 {
4430 HOST_WIDE_INT dist;
4431 int index;
4432 int var_a = CHREC_VARIABLE (access_fn_a);
4433 int var_b = CHREC_VARIABLE (access_fn_b);
4434
4435 if (var_a != var_b
4436 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4437 {
4438 non_affine_dependence_relation (ddr);
4439 return false;
4440 }
4441
4442 /* When data references are collected in a loop while data
4443 dependences are analyzed in loop nest nested in the loop, we
4444 would have more number of access functions than number of
4445 loops. Skip access functions of loops not in the loop nest.
4446
4447 See PR89725 for more information. */
4448 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4449 continue;
4450
4451 dist = int_cst_value (SUB_DISTANCE (subscript));
4452 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4453 *index_carry = MIN (index, *index_carry);
4454
4455 /* This is the subscript coupling test. If we have already
4456 recorded a distance for this loop (a distance coming from
4457 another subscript), it should be the same. For example,
4458 in the following code, there is no dependence:
4459
4460 | loop i = 0, N, 1
4461 | T[i+1][i] = ...
4462 | ... = T[i][i]
4463 | endloop
4464 */
4465 if (init_v[index] != 0 && dist_v[index] != dist)
4466 {
4467 finalize_ddr_dependent (ddr, chrec_known);
4468 return false;
4469 }
4470
4471 dist_v[index] = dist;
4472 init_v[index] = 1;
4473 *init_b = true;
4474 }
4475 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4476 {
4477 /* This can be for example an affine vs. constant dependence
4478 (T[i] vs. T[3]) that is not an affine dependence and is
4479 not representable as a distance vector. */
4480 non_affine_dependence_relation (ddr);
4481 return false;
4482 }
4483 }
4484
4485 return true;
4486 }
4487
4488 /* Return true when the DDR contains only constant access functions. */
4489
4490 static bool
4491 constant_access_functions (const struct data_dependence_relation *ddr)
4492 {
4493 unsigned i;
4494 subscript *sub;
4495
4496 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4497 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4498 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4499 return false;
4500
4501 return true;
4502 }
4503
4504 /* Helper function for the case where DDR_A and DDR_B are the same
4505 multivariate access function with a constant step. For an example
4506 see pr34635-1.c. */
4507
4508 static void
4509 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4510 {
4511 int x_1, x_2;
4512 tree c_1 = CHREC_LEFT (c_2);
4513 tree c_0 = CHREC_LEFT (c_1);
4514 lambda_vector dist_v;
4515 HOST_WIDE_INT v1, v2, cd;
4516
4517 /* Polynomials with more than 2 variables are not handled yet. When
4518 the evolution steps are parameters, it is not possible to
4519 represent the dependence using classical distance vectors. */
4520 if (TREE_CODE (c_0) != INTEGER_CST
4521 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4522 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4523 {
4524 DDR_AFFINE_P (ddr) = false;
4525 return;
4526 }
4527
4528 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4529 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4530
4531 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4532 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4533 v1 = int_cst_value (CHREC_RIGHT (c_1));
4534 v2 = int_cst_value (CHREC_RIGHT (c_2));
4535 cd = gcd (v1, v2);
4536 v1 /= cd;
4537 v2 /= cd;
4538
4539 if (v2 < 0)
4540 {
4541 v2 = -v2;
4542 v1 = -v1;
4543 }
4544
4545 dist_v[x_1] = v2;
4546 dist_v[x_2] = -v1;
4547 save_dist_v (ddr, dist_v);
4548
4549 add_outer_distances (ddr, dist_v, x_1);
4550 }
4551
4552 /* Helper function for the case where DDR_A and DDR_B are the same
4553 access functions. */
4554
4555 static void
4556 add_other_self_distances (struct data_dependence_relation *ddr)
4557 {
4558 lambda_vector dist_v;
4559 unsigned i;
4560 int index_carry = DDR_NB_LOOPS (ddr);
4561 subscript *sub;
4562 class loop *loop = DDR_LOOP_NEST (ddr)[0];
4563
4564 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4565 {
4566 tree access_fun = SUB_ACCESS_FN (sub, 0);
4567
4568 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4569 {
4570 if (!evolution_function_is_univariate_p (access_fun, loop->num))
4571 {
4572 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4573 {
4574 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4575 return;
4576 }
4577
4578 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4579
4580 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4581 add_multivariate_self_dist (ddr, access_fun);
4582 else
4583 /* The evolution step is not constant: it varies in
4584 the outer loop, so this cannot be represented by a
4585 distance vector. For example in pr34635.c the
4586 evolution is {0, +, {0, +, 4}_1}_2. */
4587 DDR_AFFINE_P (ddr) = false;
4588
4589 return;
4590 }
4591
4592 /* When data references are collected in a loop while data
4593 dependences are analyzed in loop nest nested in the loop, we
4594 would have more number of access functions than number of
4595 loops. Skip access functions of loops not in the loop nest.
4596
4597 See PR89725 for more information. */
4598 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4599 loop))
4600 continue;
4601
4602 index_carry = MIN (index_carry,
4603 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4604 DDR_LOOP_NEST (ddr)));
4605 }
4606 }
4607
4608 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4609 add_outer_distances (ddr, dist_v, index_carry);
4610 }
4611
4612 static void
4613 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4614 {
4615 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4616
4617 dist_v[0] = 1;
4618 save_dist_v (ddr, dist_v);
4619 }
4620
4621 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4622 is the case for example when access functions are the same and
4623 equal to a constant, as in:
4624
4625 | loop_1
4626 | A[3] = ...
4627 | ... = A[3]
4628 | endloop_1
4629
4630 in which case the distance vectors are (0) and (1). */
4631
4632 static void
4633 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4634 {
4635 unsigned i, j;
4636
4637 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4638 {
4639 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4640 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4641 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4642
4643 for (j = 0; j < ca->n; j++)
4644 if (affine_function_zero_p (ca->fns[j]))
4645 {
4646 insert_innermost_unit_dist_vector (ddr);
4647 return;
4648 }
4649
4650 for (j = 0; j < cb->n; j++)
4651 if (affine_function_zero_p (cb->fns[j]))
4652 {
4653 insert_innermost_unit_dist_vector (ddr);
4654 return;
4655 }
4656 }
4657 }
4658
4659 /* Return true when the DDR contains two data references that have the
4660 same access functions. */
4661
4662 static inline bool
4663 same_access_functions (const struct data_dependence_relation *ddr)
4664 {
4665 unsigned i;
4666 subscript *sub;
4667
4668 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4669 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4670 SUB_ACCESS_FN (sub, 1)))
4671 return false;
4672
4673 return true;
4674 }
4675
4676 /* Compute the classic per loop distance vector. DDR is the data
4677 dependence relation to build a vector from. Return false when fail
4678 to represent the data dependence as a distance vector. */
4679
4680 static bool
4681 build_classic_dist_vector (struct data_dependence_relation *ddr,
4682 class loop *loop_nest)
4683 {
4684 bool init_b = false;
4685 int index_carry = DDR_NB_LOOPS (ddr);
4686 lambda_vector dist_v;
4687
4688 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4689 return false;
4690
4691 if (same_access_functions (ddr))
4692 {
4693 /* Save the 0 vector. */
4694 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4695 save_dist_v (ddr, dist_v);
4696
4697 if (constant_access_functions (ddr))
4698 add_distance_for_zero_overlaps (ddr);
4699
4700 if (DDR_NB_LOOPS (ddr) > 1)
4701 add_other_self_distances (ddr);
4702
4703 return true;
4704 }
4705
4706 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4707 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4708 return false;
4709
4710 /* Save the distance vector if we initialized one. */
4711 if (init_b)
4712 {
4713 /* Verify a basic constraint: classic distance vectors should
4714 always be lexicographically positive.
4715
4716 Data references are collected in the order of execution of
4717 the program, thus for the following loop
4718
4719 | for (i = 1; i < 100; i++)
4720 | for (j = 1; j < 100; j++)
4721 | {
4722 | t = T[j+1][i-1]; // A
4723 | T[j][i] = t + 2; // B
4724 | }
4725
4726 references are collected following the direction of the wind:
4727 A then B. The data dependence tests are performed also
4728 following this order, such that we're looking at the distance
4729 separating the elements accessed by A from the elements later
4730 accessed by B. But in this example, the distance returned by
4731 test_dep (A, B) is lexicographically negative (-1, 1), that
4732 means that the access A occurs later than B with respect to
4733 the outer loop, ie. we're actually looking upwind. In this
4734 case we solve test_dep (B, A) looking downwind to the
4735 lexicographically positive solution, that returns the
4736 distance vector (1, -1). */
4737 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4738 {
4739 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4740 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4741 return false;
4742 compute_subscript_distance (ddr);
4743 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4744 &index_carry))
4745 return false;
4746 save_dist_v (ddr, save_v);
4747 DDR_REVERSED_P (ddr) = true;
4748
4749 /* In this case there is a dependence forward for all the
4750 outer loops:
4751
4752 | for (k = 1; k < 100; k++)
4753 | for (i = 1; i < 100; i++)
4754 | for (j = 1; j < 100; j++)
4755 | {
4756 | t = T[j+1][i-1]; // A
4757 | T[j][i] = t + 2; // B
4758 | }
4759
4760 the vectors are:
4761 (0, 1, -1)
4762 (1, 1, -1)
4763 (1, -1, 1)
4764 */
4765 if (DDR_NB_LOOPS (ddr) > 1)
4766 {
4767 add_outer_distances (ddr, save_v, index_carry);
4768 add_outer_distances (ddr, dist_v, index_carry);
4769 }
4770 }
4771 else
4772 {
4773 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4774 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4775
4776 if (DDR_NB_LOOPS (ddr) > 1)
4777 {
4778 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4779
4780 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4781 return false;
4782 compute_subscript_distance (ddr);
4783 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4784 &index_carry))
4785 return false;
4786
4787 save_dist_v (ddr, save_v);
4788 add_outer_distances (ddr, dist_v, index_carry);
4789 add_outer_distances (ddr, opposite_v, index_carry);
4790 }
4791 else
4792 save_dist_v (ddr, save_v);
4793 }
4794 }
4795 else
4796 {
4797 /* There is a distance of 1 on all the outer loops: Example:
4798 there is a dependence of distance 1 on loop_1 for the array A.
4799
4800 | loop_1
4801 | A[5] = ...
4802 | endloop
4803 */
4804 add_outer_distances (ddr, dist_v,
4805 lambda_vector_first_nz (dist_v,
4806 DDR_NB_LOOPS (ddr), 0));
4807 }
4808
4809 if (dump_file && (dump_flags & TDF_DETAILS))
4810 {
4811 unsigned i;
4812
4813 fprintf (dump_file, "(build_classic_dist_vector\n");
4814 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4815 {
4816 fprintf (dump_file, " dist_vector = (");
4817 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4818 DDR_NB_LOOPS (ddr));
4819 fprintf (dump_file, " )\n");
4820 }
4821 fprintf (dump_file, ")\n");
4822 }
4823
4824 return true;
4825 }
4826
4827 /* Return the direction for a given distance.
4828 FIXME: Computing dir this way is suboptimal, since dir can catch
4829 cases that dist is unable to represent. */
4830
4831 static inline enum data_dependence_direction
4832 dir_from_dist (int dist)
4833 {
4834 if (dist > 0)
4835 return dir_positive;
4836 else if (dist < 0)
4837 return dir_negative;
4838 else
4839 return dir_equal;
4840 }
4841
4842 /* Compute the classic per loop direction vector. DDR is the data
4843 dependence relation to build a vector from. */
4844
4845 static void
4846 build_classic_dir_vector (struct data_dependence_relation *ddr)
4847 {
4848 unsigned i, j;
4849 lambda_vector dist_v;
4850
4851 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4852 {
4853 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4854
4855 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4856 dir_v[j] = dir_from_dist (dist_v[j]);
4857
4858 save_dir_v (ddr, dir_v);
4859 }
4860 }
4861
4862 /* Helper function. Returns true when there is a dependence between the
4863 data references. A_INDEX is the index of the first reference (0 for
4864 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4865
4866 static bool
4867 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4868 unsigned int a_index, unsigned int b_index,
4869 class loop *loop_nest)
4870 {
4871 unsigned int i;
4872 tree last_conflicts;
4873 struct subscript *subscript;
4874 tree res = NULL_TREE;
4875
4876 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4877 {
4878 conflict_function *overlaps_a, *overlaps_b;
4879
4880 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4881 SUB_ACCESS_FN (subscript, b_index),
4882 &overlaps_a, &overlaps_b,
4883 &last_conflicts, loop_nest);
4884
4885 if (SUB_CONFLICTS_IN_A (subscript))
4886 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4887 if (SUB_CONFLICTS_IN_B (subscript))
4888 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4889
4890 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4891 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4892 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4893
4894 /* If there is any undetermined conflict function we have to
4895 give a conservative answer in case we cannot prove that
4896 no dependence exists when analyzing another subscript. */
4897 if (CF_NOT_KNOWN_P (overlaps_a)
4898 || CF_NOT_KNOWN_P (overlaps_b))
4899 {
4900 res = chrec_dont_know;
4901 continue;
4902 }
4903
4904 /* When there is a subscript with no dependence we can stop. */
4905 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4906 || CF_NO_DEPENDENCE_P (overlaps_b))
4907 {
4908 res = chrec_known;
4909 break;
4910 }
4911 }
4912
4913 if (res == NULL_TREE)
4914 return true;
4915
4916 if (res == chrec_known)
4917 dependence_stats.num_dependence_independent++;
4918 else
4919 dependence_stats.num_dependence_undetermined++;
4920 finalize_ddr_dependent (ddr, res);
4921 return false;
4922 }
4923
4924 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4925
4926 static void
4927 subscript_dependence_tester (struct data_dependence_relation *ddr,
4928 class loop *loop_nest)
4929 {
4930 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4931 dependence_stats.num_dependence_dependent++;
4932
4933 compute_subscript_distance (ddr);
4934 if (build_classic_dist_vector (ddr, loop_nest))
4935 build_classic_dir_vector (ddr);
4936 }
4937
4938 /* Returns true when all the access functions of A are affine or
4939 constant with respect to LOOP_NEST. */
4940
4941 static bool
4942 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4943 const class loop *loop_nest)
4944 {
4945 unsigned int i;
4946 vec<tree> fns = DR_ACCESS_FNS (a);
4947 tree t;
4948
4949 FOR_EACH_VEC_ELT (fns, i, t)
4950 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4951 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4952 return false;
4953
4954 return true;
4955 }
4956
4957 /* This computes the affine dependence relation between A and B with
4958 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4959 independence between two accesses, while CHREC_DONT_KNOW is used
4960 for representing the unknown relation.
4961
4962 Note that it is possible to stop the computation of the dependence
4963 relation the first time we detect a CHREC_KNOWN element for a given
4964 subscript. */
4965
4966 void
4967 compute_affine_dependence (struct data_dependence_relation *ddr,
4968 class loop *loop_nest)
4969 {
4970 struct data_reference *dra = DDR_A (ddr);
4971 struct data_reference *drb = DDR_B (ddr);
4972
4973 if (dump_file && (dump_flags & TDF_DETAILS))
4974 {
4975 fprintf (dump_file, "(compute_affine_dependence\n");
4976 fprintf (dump_file, " stmt_a: ");
4977 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4978 fprintf (dump_file, " stmt_b: ");
4979 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4980 }
4981
4982 /* Analyze only when the dependence relation is not yet known. */
4983 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4984 {
4985 dependence_stats.num_dependence_tests++;
4986
4987 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4988 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4989 subscript_dependence_tester (ddr, loop_nest);
4990
4991 /* As a last case, if the dependence cannot be determined, or if
4992 the dependence is considered too difficult to determine, answer
4993 "don't know". */
4994 else
4995 {
4996 dependence_stats.num_dependence_undetermined++;
4997
4998 if (dump_file && (dump_flags & TDF_DETAILS))
4999 {
5000 fprintf (dump_file, "Data ref a:\n");
5001 dump_data_reference (dump_file, dra);
5002 fprintf (dump_file, "Data ref b:\n");
5003 dump_data_reference (dump_file, drb);
5004 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5005 }
5006 finalize_ddr_dependent (ddr, chrec_dont_know);
5007 }
5008 }
5009
5010 if (dump_file && (dump_flags & TDF_DETAILS))
5011 {
5012 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5013 fprintf (dump_file, ") -> no dependence\n");
5014 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5015 fprintf (dump_file, ") -> dependence analysis failed\n");
5016 else
5017 fprintf (dump_file, ")\n");
5018 }
5019 }
5020
5021 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5022 the data references in DATAREFS, in the LOOP_NEST. When
5023 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5024 relations. Return true when successful, i.e. data references number
5025 is small enough to be handled. */
5026
5027 bool
5028 compute_all_dependences (vec<data_reference_p> datarefs,
5029 vec<ddr_p> *dependence_relations,
5030 vec<loop_p> loop_nest,
5031 bool compute_self_and_rr)
5032 {
5033 struct data_dependence_relation *ddr;
5034 struct data_reference *a, *b;
5035 unsigned int i, j;
5036
5037 if ((int) datarefs.length ()
5038 > param_loop_max_datarefs_for_datadeps)
5039 {
5040 struct data_dependence_relation *ddr;
5041
5042 /* Insert a single relation into dependence_relations:
5043 chrec_dont_know. */
5044 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5045 dependence_relations->safe_push (ddr);
5046 return false;
5047 }
5048
5049 FOR_EACH_VEC_ELT (datarefs, i, a)
5050 for (j = i + 1; datarefs.iterate (j, &b); j++)
5051 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5052 {
5053 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5054 dependence_relations->safe_push (ddr);
5055 if (loop_nest.exists ())
5056 compute_affine_dependence (ddr, loop_nest[0]);
5057 }
5058
5059 if (compute_self_and_rr)
5060 FOR_EACH_VEC_ELT (datarefs, i, a)
5061 {
5062 ddr = initialize_data_dependence_relation (a, a, loop_nest);
5063 dependence_relations->safe_push (ddr);
5064 if (loop_nest.exists ())
5065 compute_affine_dependence (ddr, loop_nest[0]);
5066 }
5067
5068 return true;
5069 }
5070
5071 /* Describes a location of a memory reference. */
5072
5073 struct data_ref_loc
5074 {
5075 /* The memory reference. */
5076 tree ref;
5077
5078 /* True if the memory reference is read. */
5079 bool is_read;
5080
5081 /* True if the data reference is conditional within the containing
5082 statement, i.e. if it might not occur even when the statement
5083 is executed and runs to completion. */
5084 bool is_conditional_in_stmt;
5085 };
5086
5087
5088 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5089 true if STMT clobbers memory, false otherwise. */
5090
5091 static bool
5092 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5093 {
5094 bool clobbers_memory = false;
5095 data_ref_loc ref;
5096 tree op0, op1;
5097 enum gimple_code stmt_code = gimple_code (stmt);
5098
5099 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5100 As we cannot model data-references to not spelled out
5101 accesses give up if they may occur. */
5102 if (stmt_code == GIMPLE_CALL
5103 && !(gimple_call_flags (stmt) & ECF_CONST))
5104 {
5105 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5106 if (gimple_call_internal_p (stmt))
5107 switch (gimple_call_internal_fn (stmt))
5108 {
5109 case IFN_GOMP_SIMD_LANE:
5110 {
5111 class loop *loop = gimple_bb (stmt)->loop_father;
5112 tree uid = gimple_call_arg (stmt, 0);
5113 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5114 if (loop == NULL
5115 || loop->simduid != SSA_NAME_VAR (uid))
5116 clobbers_memory = true;
5117 break;
5118 }
5119 case IFN_MASK_LOAD:
5120 case IFN_MASK_STORE:
5121 break;
5122 default:
5123 clobbers_memory = true;
5124 break;
5125 }
5126 else
5127 clobbers_memory = true;
5128 }
5129 else if (stmt_code == GIMPLE_ASM
5130 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5131 || gimple_vuse (stmt)))
5132 clobbers_memory = true;
5133
5134 if (!gimple_vuse (stmt))
5135 return clobbers_memory;
5136
5137 if (stmt_code == GIMPLE_ASSIGN)
5138 {
5139 tree base;
5140 op0 = gimple_assign_lhs (stmt);
5141 op1 = gimple_assign_rhs1 (stmt);
5142
5143 if (DECL_P (op1)
5144 || (REFERENCE_CLASS_P (op1)
5145 && (base = get_base_address (op1))
5146 && TREE_CODE (base) != SSA_NAME
5147 && !is_gimple_min_invariant (base)))
5148 {
5149 ref.ref = op1;
5150 ref.is_read = true;
5151 ref.is_conditional_in_stmt = false;
5152 references->safe_push (ref);
5153 }
5154 }
5155 else if (stmt_code == GIMPLE_CALL)
5156 {
5157 unsigned i, n;
5158 tree ptr, type;
5159 unsigned int align;
5160
5161 ref.is_read = false;
5162 if (gimple_call_internal_p (stmt))
5163 switch (gimple_call_internal_fn (stmt))
5164 {
5165 case IFN_MASK_LOAD:
5166 if (gimple_call_lhs (stmt) == NULL_TREE)
5167 break;
5168 ref.is_read = true;
5169 /* FALLTHRU */
5170 case IFN_MASK_STORE:
5171 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5172 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5173 if (ref.is_read)
5174 type = TREE_TYPE (gimple_call_lhs (stmt));
5175 else
5176 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5177 if (TYPE_ALIGN (type) != align)
5178 type = build_aligned_type (type, align);
5179 ref.is_conditional_in_stmt = true;
5180 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5181 ptr);
5182 references->safe_push (ref);
5183 return false;
5184 default:
5185 break;
5186 }
5187
5188 op0 = gimple_call_lhs (stmt);
5189 n = gimple_call_num_args (stmt);
5190 for (i = 0; i < n; i++)
5191 {
5192 op1 = gimple_call_arg (stmt, i);
5193
5194 if (DECL_P (op1)
5195 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5196 {
5197 ref.ref = op1;
5198 ref.is_read = true;
5199 ref.is_conditional_in_stmt = false;
5200 references->safe_push (ref);
5201 }
5202 }
5203 }
5204 else
5205 return clobbers_memory;
5206
5207 if (op0
5208 && (DECL_P (op0)
5209 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5210 {
5211 ref.ref = op0;
5212 ref.is_read = false;
5213 ref.is_conditional_in_stmt = false;
5214 references->safe_push (ref);
5215 }
5216 return clobbers_memory;
5217 }
5218
5219
5220 /* Returns true if the loop-nest has any data reference. */
5221
5222 bool
5223 loop_nest_has_data_refs (loop_p loop)
5224 {
5225 basic_block *bbs = get_loop_body (loop);
5226 auto_vec<data_ref_loc, 3> references;
5227
5228 for (unsigned i = 0; i < loop->num_nodes; i++)
5229 {
5230 basic_block bb = bbs[i];
5231 gimple_stmt_iterator bsi;
5232
5233 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5234 {
5235 gimple *stmt = gsi_stmt (bsi);
5236 get_references_in_stmt (stmt, &references);
5237 if (references.length ())
5238 {
5239 free (bbs);
5240 return true;
5241 }
5242 }
5243 }
5244 free (bbs);
5245 return false;
5246 }
5247
5248 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5249 reference, returns false, otherwise returns true. NEST is the outermost
5250 loop of the loop nest in which the references should be analyzed. */
5251
5252 opt_result
5253 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5254 vec<data_reference_p> *datarefs)
5255 {
5256 unsigned i;
5257 auto_vec<data_ref_loc, 2> references;
5258 data_ref_loc *ref;
5259 data_reference_p dr;
5260
5261 if (get_references_in_stmt (stmt, &references))
5262 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5263 stmt);
5264
5265 FOR_EACH_VEC_ELT (references, i, ref)
5266 {
5267 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5268 loop_containing_stmt (stmt), ref->ref,
5269 stmt, ref->is_read, ref->is_conditional_in_stmt);
5270 gcc_assert (dr != NULL);
5271 datarefs->safe_push (dr);
5272 }
5273
5274 return opt_result::success ();
5275 }
5276
5277 /* Stores the data references in STMT to DATAREFS. If there is an
5278 unanalyzable reference, returns false, otherwise returns true.
5279 NEST is the outermost loop of the loop nest in which the references
5280 should be instantiated, LOOP is the loop in which the references
5281 should be analyzed. */
5282
5283 bool
5284 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5285 vec<data_reference_p> *datarefs)
5286 {
5287 unsigned i;
5288 auto_vec<data_ref_loc, 2> references;
5289 data_ref_loc *ref;
5290 bool ret = true;
5291 data_reference_p dr;
5292
5293 if (get_references_in_stmt (stmt, &references))
5294 return false;
5295
5296 FOR_EACH_VEC_ELT (references, i, ref)
5297 {
5298 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5299 ref->is_conditional_in_stmt);
5300 gcc_assert (dr != NULL);
5301 datarefs->safe_push (dr);
5302 }
5303
5304 return ret;
5305 }
5306
5307 /* Search the data references in LOOP, and record the information into
5308 DATAREFS. Returns chrec_dont_know when failing to analyze a
5309 difficult case, returns NULL_TREE otherwise. */
5310
5311 tree
5312 find_data_references_in_bb (class loop *loop, basic_block bb,
5313 vec<data_reference_p> *datarefs)
5314 {
5315 gimple_stmt_iterator bsi;
5316
5317 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5318 {
5319 gimple *stmt = gsi_stmt (bsi);
5320
5321 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5322 {
5323 struct data_reference *res;
5324 res = XCNEW (struct data_reference);
5325 datarefs->safe_push (res);
5326
5327 return chrec_dont_know;
5328 }
5329 }
5330
5331 return NULL_TREE;
5332 }
5333
5334 /* Search the data references in LOOP, and record the information into
5335 DATAREFS. Returns chrec_dont_know when failing to analyze a
5336 difficult case, returns NULL_TREE otherwise.
5337
5338 TODO: This function should be made smarter so that it can handle address
5339 arithmetic as if they were array accesses, etc. */
5340
5341 tree
5342 find_data_references_in_loop (class loop *loop,
5343 vec<data_reference_p> *datarefs)
5344 {
5345 basic_block bb, *bbs;
5346 unsigned int i;
5347
5348 bbs = get_loop_body_in_dom_order (loop);
5349
5350 for (i = 0; i < loop->num_nodes; i++)
5351 {
5352 bb = bbs[i];
5353
5354 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5355 {
5356 free (bbs);
5357 return chrec_dont_know;
5358 }
5359 }
5360 free (bbs);
5361
5362 return NULL_TREE;
5363 }
5364
5365 /* Return the alignment in bytes that DRB is guaranteed to have at all
5366 times. */
5367
5368 unsigned int
5369 dr_alignment (innermost_loop_behavior *drb)
5370 {
5371 /* Get the alignment of BASE_ADDRESS + INIT. */
5372 unsigned int alignment = drb->base_alignment;
5373 unsigned int misalignment = (drb->base_misalignment
5374 + TREE_INT_CST_LOW (drb->init));
5375 if (misalignment != 0)
5376 alignment = MIN (alignment, misalignment & -misalignment);
5377
5378 /* Cap it to the alignment of OFFSET. */
5379 if (!integer_zerop (drb->offset))
5380 alignment = MIN (alignment, drb->offset_alignment);
5381
5382 /* Cap it to the alignment of STEP. */
5383 if (!integer_zerop (drb->step))
5384 alignment = MIN (alignment, drb->step_alignment);
5385
5386 return alignment;
5387 }
5388
5389 /* If BASE is a pointer-typed SSA name, try to find the object that it
5390 is based on. Return this object X on success and store the alignment
5391 in bytes of BASE - &X in *ALIGNMENT_OUT. */
5392
5393 static tree
5394 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5395 {
5396 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5397 return NULL_TREE;
5398
5399 gimple *def = SSA_NAME_DEF_STMT (base);
5400 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5401
5402 /* Peel chrecs and record the minimum alignment preserved by
5403 all steps. */
5404 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5405 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5406 {
5407 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5408 alignment = MIN (alignment, step_alignment);
5409 base = CHREC_LEFT (base);
5410 }
5411
5412 /* Punt if the expression is too complicated to handle. */
5413 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5414 return NULL_TREE;
5415
5416 /* The only useful cases are those for which a dereference folds to something
5417 other than an INDIRECT_REF. */
5418 tree ref_type = TREE_TYPE (TREE_TYPE (base));
5419 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5420 if (!ref)
5421 return NULL_TREE;
5422
5423 /* Analyze the base to which the steps we peeled were applied. */
5424 poly_int64 bitsize, bitpos, bytepos;
5425 machine_mode mode;
5426 int unsignedp, reversep, volatilep;
5427 tree offset;
5428 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5429 &unsignedp, &reversep, &volatilep);
5430 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5431 return NULL_TREE;
5432
5433 /* Restrict the alignment to that guaranteed by the offsets. */
5434 unsigned int bytepos_alignment = known_alignment (bytepos);
5435 if (bytepos_alignment != 0)
5436 alignment = MIN (alignment, bytepos_alignment);
5437 if (offset)
5438 {
5439 unsigned int offset_alignment = highest_pow2_factor (offset);
5440 alignment = MIN (alignment, offset_alignment);
5441 }
5442
5443 *alignment_out = alignment;
5444 return base;
5445 }
5446
5447 /* Return the object whose alignment would need to be changed in order
5448 to increase the alignment of ADDR. Store the maximum achievable
5449 alignment in *MAX_ALIGNMENT. */
5450
5451 tree
5452 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5453 {
5454 tree base = get_base_for_alignment_1 (addr, max_alignment);
5455 if (base)
5456 return base;
5457
5458 if (TREE_CODE (addr) == ADDR_EXPR)
5459 addr = TREE_OPERAND (addr, 0);
5460 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5461 return addr;
5462 }
5463
5464 /* Recursive helper function. */
5465
5466 static bool
5467 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5468 {
5469 /* Inner loops of the nest should not contain siblings. Example:
5470 when there are two consecutive loops,
5471
5472 | loop_0
5473 | loop_1
5474 | A[{0, +, 1}_1]
5475 | endloop_1
5476 | loop_2
5477 | A[{0, +, 1}_2]
5478 | endloop_2
5479 | endloop_0
5480
5481 the dependence relation cannot be captured by the distance
5482 abstraction. */
5483 if (loop->next)
5484 return false;
5485
5486 loop_nest->safe_push (loop);
5487 if (loop->inner)
5488 return find_loop_nest_1 (loop->inner, loop_nest);
5489 return true;
5490 }
5491
5492 /* Return false when the LOOP is not well nested. Otherwise return
5493 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5494 contain the loops from the outermost to the innermost, as they will
5495 appear in the classic distance vector. */
5496
5497 bool
5498 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5499 {
5500 loop_nest->safe_push (loop);
5501 if (loop->inner)
5502 return find_loop_nest_1 (loop->inner, loop_nest);
5503 return true;
5504 }
5505
5506 /* Returns true when the data dependences have been computed, false otherwise.
5507 Given a loop nest LOOP, the following vectors are returned:
5508 DATAREFS is initialized to all the array elements contained in this loop,
5509 DEPENDENCE_RELATIONS contains the relations between the data references.
5510 Compute read-read and self relations if
5511 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5512
5513 bool
5514 compute_data_dependences_for_loop (class loop *loop,
5515 bool compute_self_and_read_read_dependences,
5516 vec<loop_p> *loop_nest,
5517 vec<data_reference_p> *datarefs,
5518 vec<ddr_p> *dependence_relations)
5519 {
5520 bool res = true;
5521
5522 memset (&dependence_stats, 0, sizeof (dependence_stats));
5523
5524 /* If the loop nest is not well formed, or one of the data references
5525 is not computable, give up without spending time to compute other
5526 dependences. */
5527 if (!loop
5528 || !find_loop_nest (loop, loop_nest)
5529 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5530 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5531 compute_self_and_read_read_dependences))
5532 res = false;
5533
5534 if (dump_file && (dump_flags & TDF_STATS))
5535 {
5536 fprintf (dump_file, "Dependence tester statistics:\n");
5537
5538 fprintf (dump_file, "Number of dependence tests: %d\n",
5539 dependence_stats.num_dependence_tests);
5540 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5541 dependence_stats.num_dependence_dependent);
5542 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5543 dependence_stats.num_dependence_independent);
5544 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5545 dependence_stats.num_dependence_undetermined);
5546
5547 fprintf (dump_file, "Number of subscript tests: %d\n",
5548 dependence_stats.num_subscript_tests);
5549 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5550 dependence_stats.num_subscript_undetermined);
5551 fprintf (dump_file, "Number of same subscript function: %d\n",
5552 dependence_stats.num_same_subscript_function);
5553
5554 fprintf (dump_file, "Number of ziv tests: %d\n",
5555 dependence_stats.num_ziv);
5556 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5557 dependence_stats.num_ziv_dependent);
5558 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5559 dependence_stats.num_ziv_independent);
5560 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5561 dependence_stats.num_ziv_unimplemented);
5562
5563 fprintf (dump_file, "Number of siv tests: %d\n",
5564 dependence_stats.num_siv);
5565 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5566 dependence_stats.num_siv_dependent);
5567 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5568 dependence_stats.num_siv_independent);
5569 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5570 dependence_stats.num_siv_unimplemented);
5571
5572 fprintf (dump_file, "Number of miv tests: %d\n",
5573 dependence_stats.num_miv);
5574 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5575 dependence_stats.num_miv_dependent);
5576 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5577 dependence_stats.num_miv_independent);
5578 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5579 dependence_stats.num_miv_unimplemented);
5580 }
5581
5582 return res;
5583 }
5584
5585 /* Free the memory used by a data dependence relation DDR. */
5586
5587 void
5588 free_dependence_relation (struct data_dependence_relation *ddr)
5589 {
5590 if (ddr == NULL)
5591 return;
5592
5593 if (DDR_SUBSCRIPTS (ddr).exists ())
5594 free_subscripts (DDR_SUBSCRIPTS (ddr));
5595 DDR_DIST_VECTS (ddr).release ();
5596 DDR_DIR_VECTS (ddr).release ();
5597
5598 free (ddr);
5599 }
5600
5601 /* Free the memory used by the data dependence relations from
5602 DEPENDENCE_RELATIONS. */
5603
5604 void
5605 free_dependence_relations (vec<ddr_p> dependence_relations)
5606 {
5607 unsigned int i;
5608 struct data_dependence_relation *ddr;
5609
5610 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5611 if (ddr)
5612 free_dependence_relation (ddr);
5613
5614 dependence_relations.release ();
5615 }
5616
5617 /* Free the memory used by the data references from DATAREFS. */
5618
5619 void
5620 free_data_refs (vec<data_reference_p> datarefs)
5621 {
5622 unsigned int i;
5623 struct data_reference *dr;
5624
5625 FOR_EACH_VEC_ELT (datarefs, i, dr)
5626 free_data_ref (dr);
5627 datarefs.release ();
5628 }
5629
5630 /* Common routine implementing both dr_direction_indicator and
5631 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
5632 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5633 Return the step as the indicator otherwise. */
5634
5635 static tree
5636 dr_step_indicator (struct data_reference *dr, int useful_min)
5637 {
5638 tree step = DR_STEP (dr);
5639 if (!step)
5640 return NULL_TREE;
5641 STRIP_NOPS (step);
5642 /* Look for cases where the step is scaled by a positive constant
5643 integer, which will often be the access size. If the multiplication
5644 doesn't change the sign (due to overflow effects) then we can
5645 test the unscaled value instead. */
5646 if (TREE_CODE (step) == MULT_EXPR
5647 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5648 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5649 {
5650 tree factor = TREE_OPERAND (step, 1);
5651 step = TREE_OPERAND (step, 0);
5652
5653 /* Strip widening and truncating conversions as well as nops. */
5654 if (CONVERT_EXPR_P (step)
5655 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5656 step = TREE_OPERAND (step, 0);
5657 tree type = TREE_TYPE (step);
5658
5659 /* Get the range of step values that would not cause overflow. */
5660 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5661 / wi::to_widest (factor));
5662 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5663 / wi::to_widest (factor));
5664
5665 /* Get the range of values that the unconverted step actually has. */
5666 wide_int step_min, step_max;
5667 if (TREE_CODE (step) != SSA_NAME
5668 || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5669 {
5670 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5671 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5672 }
5673
5674 /* Check whether the unconverted step has an acceptable range. */
5675 signop sgn = TYPE_SIGN (type);
5676 if (wi::les_p (minv, widest_int::from (step_min, sgn))
5677 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5678 {
5679 if (wi::ge_p (step_min, useful_min, sgn))
5680 return ssize_int (useful_min);
5681 else if (wi::lt_p (step_max, 0, sgn))
5682 return ssize_int (-1);
5683 else
5684 return fold_convert (ssizetype, step);
5685 }
5686 }
5687 return DR_STEP (dr);
5688 }
5689
5690 /* Return a value that is negative iff DR has a negative step. */
5691
5692 tree
5693 dr_direction_indicator (struct data_reference *dr)
5694 {
5695 return dr_step_indicator (dr, 0);
5696 }
5697
5698 /* Return a value that is zero iff DR has a zero step. */
5699
5700 tree
5701 dr_zero_step_indicator (struct data_reference *dr)
5702 {
5703 return dr_step_indicator (dr, 1);
5704 }
5705
5706 /* Return true if DR is known to have a nonnegative (but possibly zero)
5707 step. */
5708
5709 bool
5710 dr_known_forward_stride_p (struct data_reference *dr)
5711 {
5712 tree indicator = dr_direction_indicator (dr);
5713 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5714 fold_convert (ssizetype, indicator),
5715 ssize_int (0));
5716 return neg_step_val && integer_zerop (neg_step_val);
5717 }