]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-data-ref.c
Apply mechanical replacement (generated patch).
[thirdparty/gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
98 #include "tree-eh.h"
99 #include "ssa.h"
100
101 static struct datadep_stats
102 {
103 int num_dependence_tests;
104 int num_dependence_dependent;
105 int num_dependence_independent;
106 int num_dependence_undetermined;
107
108 int num_subscript_tests;
109 int num_subscript_undetermined;
110 int num_same_subscript_function;
111
112 int num_ziv;
113 int num_ziv_independent;
114 int num_ziv_dependent;
115 int num_ziv_unimplemented;
116
117 int num_siv;
118 int num_siv_independent;
119 int num_siv_dependent;
120 int num_siv_unimplemented;
121
122 int num_miv;
123 int num_miv_independent;
124 int num_miv_dependent;
125 int num_miv_unimplemented;
126 } dependence_stats;
127
128 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
129 unsigned int, unsigned int,
130 class loop *);
131 /* Returns true iff A divides B. */
132
133 static inline bool
134 tree_fold_divides_p (const_tree a, const_tree b)
135 {
136 gcc_assert (TREE_CODE (a) == INTEGER_CST);
137 gcc_assert (TREE_CODE (b) == INTEGER_CST);
138 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
139 }
140
141 /* Returns true iff A divides B. */
142
143 static inline bool
144 int_divides_p (int a, int b)
145 {
146 return ((b % a) == 0);
147 }
148
149 /* Return true if reference REF contains a union access. */
150
151 static bool
152 ref_contains_union_access_p (tree ref)
153 {
154 while (handled_component_p (ref))
155 {
156 ref = TREE_OPERAND (ref, 0);
157 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
158 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
159 return true;
160 }
161 return false;
162 }
163
164 \f
165
166 /* Dump into FILE all the data references from DATAREFS. */
167
168 static void
169 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
170 {
171 unsigned int i;
172 struct data_reference *dr;
173
174 FOR_EACH_VEC_ELT (datarefs, i, dr)
175 dump_data_reference (file, dr);
176 }
177
178 /* Unified dump into FILE all the data references from DATAREFS. */
179
180 DEBUG_FUNCTION void
181 debug (vec<data_reference_p> &ref)
182 {
183 dump_data_references (stderr, ref);
184 }
185
186 DEBUG_FUNCTION void
187 debug (vec<data_reference_p> *ptr)
188 {
189 if (ptr)
190 debug (*ptr);
191 else
192 fprintf (stderr, "<nil>\n");
193 }
194
195
196 /* Dump into STDERR all the data references from DATAREFS. */
197
198 DEBUG_FUNCTION void
199 debug_data_references (vec<data_reference_p> datarefs)
200 {
201 dump_data_references (stderr, datarefs);
202 }
203
204 /* Print to STDERR the data_reference DR. */
205
206 DEBUG_FUNCTION void
207 debug_data_reference (struct data_reference *dr)
208 {
209 dump_data_reference (stderr, dr);
210 }
211
212 /* Dump function for a DATA_REFERENCE structure. */
213
214 void
215 dump_data_reference (FILE *outf,
216 struct data_reference *dr)
217 {
218 unsigned int i;
219
220 fprintf (outf, "#(Data Ref: \n");
221 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222 fprintf (outf, "# stmt: ");
223 print_gimple_stmt (outf, DR_STMT (dr), 0);
224 fprintf (outf, "# ref: ");
225 print_generic_stmt (outf, DR_REF (dr));
226 fprintf (outf, "# base_object: ");
227 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
228
229 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
230 {
231 fprintf (outf, "# Access function %d: ", i);
232 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
233 }
234 fprintf (outf, "#)\n");
235 }
236
237 /* Unified dump function for a DATA_REFERENCE structure. */
238
239 DEBUG_FUNCTION void
240 debug (data_reference &ref)
241 {
242 dump_data_reference (stderr, &ref);
243 }
244
245 DEBUG_FUNCTION void
246 debug (data_reference *ptr)
247 {
248 if (ptr)
249 debug (*ptr);
250 else
251 fprintf (stderr, "<nil>\n");
252 }
253
254
255 /* Dumps the affine function described by FN to the file OUTF. */
256
257 DEBUG_FUNCTION void
258 dump_affine_function (FILE *outf, affine_fn fn)
259 {
260 unsigned i;
261 tree coef;
262
263 print_generic_expr (outf, fn[0], TDF_SLIM);
264 for (i = 1; fn.iterate (i, &coef); i++)
265 {
266 fprintf (outf, " + ");
267 print_generic_expr (outf, coef, TDF_SLIM);
268 fprintf (outf, " * x_%u", i);
269 }
270 }
271
272 /* Dumps the conflict function CF to the file OUTF. */
273
274 DEBUG_FUNCTION void
275 dump_conflict_function (FILE *outf, conflict_function *cf)
276 {
277 unsigned i;
278
279 if (cf->n == NO_DEPENDENCE)
280 fprintf (outf, "no dependence");
281 else if (cf->n == NOT_KNOWN)
282 fprintf (outf, "not known");
283 else
284 {
285 for (i = 0; i < cf->n; i++)
286 {
287 if (i != 0)
288 fprintf (outf, " ");
289 fprintf (outf, "[");
290 dump_affine_function (outf, cf->fns[i]);
291 fprintf (outf, "]");
292 }
293 }
294 }
295
296 /* Dump function for a SUBSCRIPT structure. */
297
298 DEBUG_FUNCTION void
299 dump_subscript (FILE *outf, struct subscript *subscript)
300 {
301 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
302
303 fprintf (outf, "\n (subscript \n");
304 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
305 dump_conflict_function (outf, cf);
306 if (CF_NONTRIVIAL_P (cf))
307 {
308 tree last_iteration = SUB_LAST_CONFLICT (subscript);
309 fprintf (outf, "\n last_conflict: ");
310 print_generic_expr (outf, last_iteration);
311 }
312
313 cf = SUB_CONFLICTS_IN_B (subscript);
314 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
315 dump_conflict_function (outf, cf);
316 if (CF_NONTRIVIAL_P (cf))
317 {
318 tree last_iteration = SUB_LAST_CONFLICT (subscript);
319 fprintf (outf, "\n last_conflict: ");
320 print_generic_expr (outf, last_iteration);
321 }
322
323 fprintf (outf, "\n (Subscript distance: ");
324 print_generic_expr (outf, SUB_DISTANCE (subscript));
325 fprintf (outf, " ))\n");
326 }
327
328 /* Print the classic direction vector DIRV to OUTF. */
329
330 DEBUG_FUNCTION void
331 print_direction_vector (FILE *outf,
332 lambda_vector dirv,
333 int length)
334 {
335 int eq;
336
337 for (eq = 0; eq < length; eq++)
338 {
339 enum data_dependence_direction dir = ((enum data_dependence_direction)
340 dirv[eq]);
341
342 switch (dir)
343 {
344 case dir_positive:
345 fprintf (outf, " +");
346 break;
347 case dir_negative:
348 fprintf (outf, " -");
349 break;
350 case dir_equal:
351 fprintf (outf, " =");
352 break;
353 case dir_positive_or_equal:
354 fprintf (outf, " +=");
355 break;
356 case dir_positive_or_negative:
357 fprintf (outf, " +-");
358 break;
359 case dir_negative_or_equal:
360 fprintf (outf, " -=");
361 break;
362 case dir_star:
363 fprintf (outf, " *");
364 break;
365 default:
366 fprintf (outf, "indep");
367 break;
368 }
369 }
370 fprintf (outf, "\n");
371 }
372
373 /* Print a vector of direction vectors. */
374
375 DEBUG_FUNCTION void
376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 int length)
378 {
379 unsigned j;
380 lambda_vector v;
381
382 FOR_EACH_VEC_ELT (dir_vects, j, v)
383 print_direction_vector (outf, v, length);
384 }
385
386 /* Print out a vector VEC of length N to OUTFILE. */
387
388 DEBUG_FUNCTION void
389 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
390 {
391 int i;
392
393 for (i = 0; i < n; i++)
394 fprintf (outfile, "%3d ", (int)vector[i]);
395 fprintf (outfile, "\n");
396 }
397
398 /* Print a vector of distance vectors. */
399
400 DEBUG_FUNCTION void
401 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
402 int length)
403 {
404 unsigned j;
405 lambda_vector v;
406
407 FOR_EACH_VEC_ELT (dist_vects, j, v)
408 print_lambda_vector (outf, v, length);
409 }
410
411 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
412
413 DEBUG_FUNCTION void
414 dump_data_dependence_relation (FILE *outf,
415 struct data_dependence_relation *ddr)
416 {
417 struct data_reference *dra, *drb;
418
419 fprintf (outf, "(Data Dep: \n");
420
421 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
422 {
423 if (ddr)
424 {
425 dra = DDR_A (ddr);
426 drb = DDR_B (ddr);
427 if (dra)
428 dump_data_reference (outf, dra);
429 else
430 fprintf (outf, " (nil)\n");
431 if (drb)
432 dump_data_reference (outf, drb);
433 else
434 fprintf (outf, " (nil)\n");
435 }
436 fprintf (outf, " (don't know)\n)\n");
437 return;
438 }
439
440 dra = DDR_A (ddr);
441 drb = DDR_B (ddr);
442 dump_data_reference (outf, dra);
443 dump_data_reference (outf, drb);
444
445 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
446 fprintf (outf, " (no dependence)\n");
447
448 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
449 {
450 unsigned int i;
451 class loop *loopi;
452
453 subscript *sub;
454 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
455 {
456 fprintf (outf, " access_fn_A: ");
457 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
458 fprintf (outf, " access_fn_B: ");
459 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
460 dump_subscript (outf, sub);
461 }
462
463 fprintf (outf, " loop nest: (");
464 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
465 fprintf (outf, "%d ", loopi->num);
466 fprintf (outf, ")\n");
467
468 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
469 {
470 fprintf (outf, " distance_vector: ");
471 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
472 DDR_NB_LOOPS (ddr));
473 }
474
475 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
476 {
477 fprintf (outf, " direction_vector: ");
478 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
479 DDR_NB_LOOPS (ddr));
480 }
481 }
482
483 fprintf (outf, ")\n");
484 }
485
486 /* Debug version. */
487
488 DEBUG_FUNCTION void
489 debug_data_dependence_relation (struct data_dependence_relation *ddr)
490 {
491 dump_data_dependence_relation (stderr, ddr);
492 }
493
494 /* Dump into FILE all the dependence relations from DDRS. */
495
496 DEBUG_FUNCTION void
497 dump_data_dependence_relations (FILE *file,
498 vec<ddr_p> ddrs)
499 {
500 unsigned int i;
501 struct data_dependence_relation *ddr;
502
503 FOR_EACH_VEC_ELT (ddrs, i, ddr)
504 dump_data_dependence_relation (file, ddr);
505 }
506
507 DEBUG_FUNCTION void
508 debug (vec<ddr_p> &ref)
509 {
510 dump_data_dependence_relations (stderr, ref);
511 }
512
513 DEBUG_FUNCTION void
514 debug (vec<ddr_p> *ptr)
515 {
516 if (ptr)
517 debug (*ptr);
518 else
519 fprintf (stderr, "<nil>\n");
520 }
521
522
523 /* Dump to STDERR all the dependence relations from DDRS. */
524
525 DEBUG_FUNCTION void
526 debug_data_dependence_relations (vec<ddr_p> ddrs)
527 {
528 dump_data_dependence_relations (stderr, ddrs);
529 }
530
531 /* Dumps the distance and direction vectors in FILE. DDRS contains
532 the dependence relations, and VECT_SIZE is the size of the
533 dependence vectors, or in other words the number of loops in the
534 considered nest. */
535
536 DEBUG_FUNCTION void
537 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
538 {
539 unsigned int i, j;
540 struct data_dependence_relation *ddr;
541 lambda_vector v;
542
543 FOR_EACH_VEC_ELT (ddrs, i, ddr)
544 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
545 {
546 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
547 {
548 fprintf (file, "DISTANCE_V (");
549 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
550 fprintf (file, ")\n");
551 }
552
553 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
554 {
555 fprintf (file, "DIRECTION_V (");
556 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
557 fprintf (file, ")\n");
558 }
559 }
560
561 fprintf (file, "\n\n");
562 }
563
564 /* Dumps the data dependence relations DDRS in FILE. */
565
566 DEBUG_FUNCTION void
567 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
568 {
569 unsigned int i;
570 struct data_dependence_relation *ddr;
571
572 FOR_EACH_VEC_ELT (ddrs, i, ddr)
573 dump_data_dependence_relation (file, ddr);
574
575 fprintf (file, "\n\n");
576 }
577
578 DEBUG_FUNCTION void
579 debug_ddrs (vec<ddr_p> ddrs)
580 {
581 dump_ddrs (stderr, ddrs);
582 }
583
584 static void
585 split_constant_offset (tree exp, tree *var, tree *off,
586 hash_map<tree, std::pair<tree, tree> > &cache,
587 unsigned *limit);
588
589 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
590 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
591 constant of type ssizetype, and returns true. If we cannot do this
592 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
593 is returned. */
594
595 static bool
596 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
597 tree *var, tree *off,
598 hash_map<tree, std::pair<tree, tree> > &cache,
599 unsigned *limit)
600 {
601 tree var0, var1;
602 tree off0, off1;
603 enum tree_code ocode = code;
604
605 *var = NULL_TREE;
606 *off = NULL_TREE;
607
608 switch (code)
609 {
610 case INTEGER_CST:
611 *var = build_int_cst (type, 0);
612 *off = fold_convert (ssizetype, op0);
613 return true;
614
615 case POINTER_PLUS_EXPR:
616 ocode = PLUS_EXPR;
617 /* FALLTHROUGH */
618 case PLUS_EXPR:
619 case MINUS_EXPR:
620 if (TREE_CODE (op1) == INTEGER_CST)
621 {
622 split_constant_offset (op0, &var0, &off0, cache, limit);
623 *var = var0;
624 *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
625 return true;
626 }
627 split_constant_offset (op0, &var0, &off0, cache, limit);
628 split_constant_offset (op1, &var1, &off1, cache, limit);
629 *var = fold_build2 (code, type, var0, var1);
630 *off = size_binop (ocode, off0, off1);
631 return true;
632
633 case MULT_EXPR:
634 if (TREE_CODE (op1) != INTEGER_CST)
635 return false;
636
637 split_constant_offset (op0, &var0, &off0, cache, limit);
638 *var = fold_build2 (MULT_EXPR, type, var0, op1);
639 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
640 return true;
641
642 case ADDR_EXPR:
643 {
644 tree base, poffset;
645 poly_int64 pbitsize, pbitpos, pbytepos;
646 machine_mode pmode;
647 int punsignedp, preversep, pvolatilep;
648
649 op0 = TREE_OPERAND (op0, 0);
650 base
651 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
652 &punsignedp, &preversep, &pvolatilep);
653
654 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
655 return false;
656 base = build_fold_addr_expr (base);
657 off0 = ssize_int (pbytepos);
658
659 if (poffset)
660 {
661 split_constant_offset (poffset, &poffset, &off1, cache, limit);
662 off0 = size_binop (PLUS_EXPR, off0, off1);
663 if (POINTER_TYPE_P (TREE_TYPE (base)))
664 base = fold_build_pointer_plus (base, poffset);
665 else
666 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
667 fold_convert (TREE_TYPE (base), poffset));
668 }
669
670 var0 = fold_convert (type, base);
671
672 /* If variable length types are involved, punt, otherwise casts
673 might be converted into ARRAY_REFs in gimplify_conversion.
674 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
675 possibly no longer appears in current GIMPLE, might resurface.
676 This perhaps could run
677 if (CONVERT_EXPR_P (var0))
678 {
679 gimplify_conversion (&var0);
680 // Attempt to fill in any within var0 found ARRAY_REF's
681 // element size from corresponding op embedded ARRAY_REF,
682 // if unsuccessful, just punt.
683 } */
684 while (POINTER_TYPE_P (type))
685 type = TREE_TYPE (type);
686 if (int_size_in_bytes (type) < 0)
687 return false;
688
689 *var = var0;
690 *off = off0;
691 return true;
692 }
693
694 case SSA_NAME:
695 {
696 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
697 return false;
698
699 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
700 enum tree_code subcode;
701
702 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
703 return false;
704
705 subcode = gimple_assign_rhs_code (def_stmt);
706
707 /* We are using a cache to avoid un-CSEing large amounts of code. */
708 bool use_cache = false;
709 if (!has_single_use (op0)
710 && (subcode == POINTER_PLUS_EXPR
711 || subcode == PLUS_EXPR
712 || subcode == MINUS_EXPR
713 || subcode == MULT_EXPR
714 || subcode == ADDR_EXPR
715 || CONVERT_EXPR_CODE_P (subcode)))
716 {
717 use_cache = true;
718 bool existed;
719 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
720 if (existed)
721 {
722 if (integer_zerop (e.second))
723 return false;
724 *var = e.first;
725 *off = e.second;
726 return true;
727 }
728 e = std::make_pair (op0, ssize_int (0));
729 }
730
731 if (*limit == 0)
732 return false;
733 --*limit;
734
735 var0 = gimple_assign_rhs1 (def_stmt);
736 var1 = gimple_assign_rhs2 (def_stmt);
737
738 bool res = split_constant_offset_1 (type, var0, subcode, var1,
739 var, off, cache, limit);
740 if (res && use_cache)
741 *cache.get (op0) = std::make_pair (*var, *off);
742 return res;
743 }
744 CASE_CONVERT:
745 {
746 /* We must not introduce undefined overflow, and we must not change
747 the value. Hence we're okay if the inner type doesn't overflow
748 to start with (pointer or signed), the outer type also is an
749 integer or pointer and the outer precision is at least as large
750 as the inner. */
751 tree itype = TREE_TYPE (op0);
752 if ((POINTER_TYPE_P (itype)
753 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
754 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
755 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
756 {
757 if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
758 {
759 /* Split the unconverted operand and try to prove that
760 wrapping isn't a problem. */
761 tree tmp_var, tmp_off;
762 split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
763
764 /* See whether we have an SSA_NAME whose range is known
765 to be [A, B]. */
766 if (TREE_CODE (tmp_var) != SSA_NAME)
767 return false;
768 wide_int var_min, var_max;
769 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
770 &var_max);
771 wide_int var_nonzero = get_nonzero_bits (tmp_var);
772 signop sgn = TYPE_SIGN (itype);
773 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
774 &var_max, var_nonzero,
775 sgn) != VR_RANGE)
776 return false;
777
778 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
779 is known to be [A + TMP_OFF, B + TMP_OFF], with all
780 operations done in ITYPE. The addition must overflow
781 at both ends of the range or at neither. */
782 wi::overflow_type overflow[2];
783 unsigned int prec = TYPE_PRECISION (itype);
784 wide_int woff = wi::to_wide (tmp_off, prec);
785 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
786 wi::add (var_max, woff, sgn, &overflow[1]);
787 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
788 return false;
789
790 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
791 widest_int diff = (widest_int::from (op0_min, sgn)
792 - widest_int::from (var_min, sgn));
793 var0 = tmp_var;
794 *off = wide_int_to_tree (ssizetype, diff);
795 }
796 else
797 split_constant_offset (op0, &var0, off, cache, limit);
798 *var = fold_convert (type, var0);
799 return true;
800 }
801 return false;
802 }
803
804 default:
805 return false;
806 }
807 }
808
809 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
810 will be ssizetype. */
811
812 static void
813 split_constant_offset (tree exp, tree *var, tree *off,
814 hash_map<tree, std::pair<tree, tree> > &cache,
815 unsigned *limit)
816 {
817 tree type = TREE_TYPE (exp), op0, op1, e, o;
818 enum tree_code code;
819
820 *var = exp;
821 *off = ssize_int (0);
822
823 if (tree_is_chrec (exp)
824 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
825 return;
826
827 code = TREE_CODE (exp);
828 extract_ops_from_tree (exp, &code, &op0, &op1);
829 if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
830 {
831 *var = e;
832 *off = o;
833 }
834 }
835
836 void
837 split_constant_offset (tree exp, tree *var, tree *off)
838 {
839 unsigned limit = param_ssa_name_def_chain_limit;
840 static hash_map<tree, std::pair<tree, tree> > *cache;
841 if (!cache)
842 cache = new hash_map<tree, std::pair<tree, tree> > (37);
843 split_constant_offset (exp, var, off, *cache, &limit);
844 cache->empty ();
845 }
846
847 /* Returns the address ADDR of an object in a canonical shape (without nop
848 casts, and with type of pointer to the object). */
849
850 static tree
851 canonicalize_base_object_address (tree addr)
852 {
853 tree orig = addr;
854
855 STRIP_NOPS (addr);
856
857 /* The base address may be obtained by casting from integer, in that case
858 keep the cast. */
859 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
860 return orig;
861
862 if (TREE_CODE (addr) != ADDR_EXPR)
863 return addr;
864
865 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
866 }
867
868 /* Analyze the behavior of memory reference REF within STMT.
869 There are two modes:
870
871 - BB analysis. In this case we simply split the address into base,
872 init and offset components, without reference to any containing loop.
873 The resulting base and offset are general expressions and they can
874 vary arbitrarily from one iteration of the containing loop to the next.
875 The step is always zero.
876
877 - loop analysis. In this case we analyze the reference both wrt LOOP
878 and on the basis that the reference occurs (is "used") in LOOP;
879 see the comment above analyze_scalar_evolution_in_loop for more
880 information about this distinction. The base, init, offset and
881 step fields are all invariant in LOOP.
882
883 Perform BB analysis if LOOP is null, or if LOOP is the function's
884 dummy outermost loop. In other cases perform loop analysis.
885
886 Return true if the analysis succeeded and store the results in DRB if so.
887 BB analysis can only fail for bitfield or reversed-storage accesses. */
888
889 opt_result
890 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
891 class loop *loop, const gimple *stmt)
892 {
893 poly_int64 pbitsize, pbitpos;
894 tree base, poffset;
895 machine_mode pmode;
896 int punsignedp, preversep, pvolatilep;
897 affine_iv base_iv, offset_iv;
898 tree init, dinit, step;
899 bool in_loop = (loop && loop->num);
900
901 if (dump_file && (dump_flags & TDF_DETAILS))
902 fprintf (dump_file, "analyze_innermost: ");
903
904 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
905 &punsignedp, &preversep, &pvolatilep);
906 gcc_assert (base != NULL_TREE);
907
908 poly_int64 pbytepos;
909 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
910 return opt_result::failure_at (stmt,
911 "failed: bit offset alignment.\n");
912
913 if (preversep)
914 return opt_result::failure_at (stmt,
915 "failed: reverse storage order.\n");
916
917 /* Calculate the alignment and misalignment for the inner reference. */
918 unsigned int HOST_WIDE_INT bit_base_misalignment;
919 unsigned int bit_base_alignment;
920 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
921
922 /* There are no bitfield references remaining in BASE, so the values
923 we got back must be whole bytes. */
924 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
925 && bit_base_misalignment % BITS_PER_UNIT == 0);
926 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
927 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
928
929 if (TREE_CODE (base) == MEM_REF)
930 {
931 if (!integer_zerop (TREE_OPERAND (base, 1)))
932 {
933 /* Subtract MOFF from the base and add it to POFFSET instead.
934 Adjust the misalignment to reflect the amount we subtracted. */
935 poly_offset_int moff = mem_ref_offset (base);
936 base_misalignment -= moff.force_shwi ();
937 tree mofft = wide_int_to_tree (sizetype, moff);
938 if (!poffset)
939 poffset = mofft;
940 else
941 poffset = size_binop (PLUS_EXPR, poffset, mofft);
942 }
943 base = TREE_OPERAND (base, 0);
944 }
945 else
946 base = build_fold_addr_expr (base);
947
948 if (in_loop)
949 {
950 if (!simple_iv (loop, loop, base, &base_iv, true))
951 return opt_result::failure_at
952 (stmt, "failed: evolution of base is not affine.\n");
953 }
954 else
955 {
956 base_iv.base = base;
957 base_iv.step = ssize_int (0);
958 base_iv.no_overflow = true;
959 }
960
961 if (!poffset)
962 {
963 offset_iv.base = ssize_int (0);
964 offset_iv.step = ssize_int (0);
965 }
966 else
967 {
968 if (!in_loop)
969 {
970 offset_iv.base = poffset;
971 offset_iv.step = ssize_int (0);
972 }
973 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
974 return opt_result::failure_at
975 (stmt, "failed: evolution of offset is not affine.\n");
976 }
977
978 init = ssize_int (pbytepos);
979
980 /* Subtract any constant component from the base and add it to INIT instead.
981 Adjust the misalignment to reflect the amount we subtracted. */
982 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
983 init = size_binop (PLUS_EXPR, init, dinit);
984 base_misalignment -= TREE_INT_CST_LOW (dinit);
985
986 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
987 init = size_binop (PLUS_EXPR, init, dinit);
988
989 step = size_binop (PLUS_EXPR,
990 fold_convert (ssizetype, base_iv.step),
991 fold_convert (ssizetype, offset_iv.step));
992
993 base = canonicalize_base_object_address (base_iv.base);
994
995 /* See if get_pointer_alignment can guarantee a higher alignment than
996 the one we calculated above. */
997 unsigned int HOST_WIDE_INT alt_misalignment;
998 unsigned int alt_alignment;
999 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1000
1001 /* As above, these values must be whole bytes. */
1002 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1003 && alt_misalignment % BITS_PER_UNIT == 0);
1004 alt_alignment /= BITS_PER_UNIT;
1005 alt_misalignment /= BITS_PER_UNIT;
1006
1007 if (base_alignment < alt_alignment)
1008 {
1009 base_alignment = alt_alignment;
1010 base_misalignment = alt_misalignment;
1011 }
1012
1013 drb->base_address = base;
1014 drb->offset = fold_convert (ssizetype, offset_iv.base);
1015 drb->init = init;
1016 drb->step = step;
1017 if (known_misalignment (base_misalignment, base_alignment,
1018 &drb->base_misalignment))
1019 drb->base_alignment = base_alignment;
1020 else
1021 {
1022 drb->base_alignment = known_alignment (base_misalignment);
1023 drb->base_misalignment = 0;
1024 }
1025 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1026 drb->step_alignment = highest_pow2_factor (step);
1027
1028 if (dump_file && (dump_flags & TDF_DETAILS))
1029 fprintf (dump_file, "success.\n");
1030
1031 return opt_result::success ();
1032 }
1033
1034 /* Return true if OP is a valid component reference for a DR access
1035 function. This accepts a subset of what handled_component_p accepts. */
1036
1037 static bool
1038 access_fn_component_p (tree op)
1039 {
1040 switch (TREE_CODE (op))
1041 {
1042 case REALPART_EXPR:
1043 case IMAGPART_EXPR:
1044 case ARRAY_REF:
1045 return true;
1046
1047 case COMPONENT_REF:
1048 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1049
1050 default:
1051 return false;
1052 }
1053 }
1054
1055 /* Determines the base object and the list of indices of memory reference
1056 DR, analyzed in LOOP and instantiated before NEST. */
1057
1058 static void
1059 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1060 {
1061 vec<tree> access_fns = vNULL;
1062 tree ref, op;
1063 tree base, off, access_fn;
1064
1065 /* If analyzing a basic-block there are no indices to analyze
1066 and thus no access functions. */
1067 if (!nest)
1068 {
1069 DR_BASE_OBJECT (dr) = DR_REF (dr);
1070 DR_ACCESS_FNS (dr).create (0);
1071 return;
1072 }
1073
1074 ref = DR_REF (dr);
1075
1076 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1077 into a two element array with a constant index. The base is
1078 then just the immediate underlying object. */
1079 if (TREE_CODE (ref) == REALPART_EXPR)
1080 {
1081 ref = TREE_OPERAND (ref, 0);
1082 access_fns.safe_push (integer_zero_node);
1083 }
1084 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1085 {
1086 ref = TREE_OPERAND (ref, 0);
1087 access_fns.safe_push (integer_one_node);
1088 }
1089
1090 /* Analyze access functions of dimensions we know to be independent.
1091 The list of component references handled here should be kept in
1092 sync with access_fn_component_p. */
1093 while (handled_component_p (ref))
1094 {
1095 if (TREE_CODE (ref) == ARRAY_REF)
1096 {
1097 op = TREE_OPERAND (ref, 1);
1098 access_fn = analyze_scalar_evolution (loop, op);
1099 access_fn = instantiate_scev (nest, loop, access_fn);
1100 access_fns.safe_push (access_fn);
1101 }
1102 else if (TREE_CODE (ref) == COMPONENT_REF
1103 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1104 {
1105 /* For COMPONENT_REFs of records (but not unions!) use the
1106 FIELD_DECL offset as constant access function so we can
1107 disambiguate a[i].f1 and a[i].f2. */
1108 tree off = component_ref_field_offset (ref);
1109 off = size_binop (PLUS_EXPR,
1110 size_binop (MULT_EXPR,
1111 fold_convert (bitsizetype, off),
1112 bitsize_int (BITS_PER_UNIT)),
1113 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1114 access_fns.safe_push (off);
1115 }
1116 else
1117 /* If we have an unhandled component we could not translate
1118 to an access function stop analyzing. We have determined
1119 our base object in this case. */
1120 break;
1121
1122 ref = TREE_OPERAND (ref, 0);
1123 }
1124
1125 /* If the address operand of a MEM_REF base has an evolution in the
1126 analyzed nest, add it as an additional independent access-function. */
1127 if (TREE_CODE (ref) == MEM_REF)
1128 {
1129 op = TREE_OPERAND (ref, 0);
1130 access_fn = analyze_scalar_evolution (loop, op);
1131 access_fn = instantiate_scev (nest, loop, access_fn);
1132 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1133 {
1134 tree orig_type;
1135 tree memoff = TREE_OPERAND (ref, 1);
1136 base = initial_condition (access_fn);
1137 orig_type = TREE_TYPE (base);
1138 STRIP_USELESS_TYPE_CONVERSION (base);
1139 split_constant_offset (base, &base, &off);
1140 STRIP_USELESS_TYPE_CONVERSION (base);
1141 /* Fold the MEM_REF offset into the evolutions initial
1142 value to make more bases comparable. */
1143 if (!integer_zerop (memoff))
1144 {
1145 off = size_binop (PLUS_EXPR, off,
1146 fold_convert (ssizetype, memoff));
1147 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1148 }
1149 /* Adjust the offset so it is a multiple of the access type
1150 size and thus we separate bases that can possibly be used
1151 to produce partial overlaps (which the access_fn machinery
1152 cannot handle). */
1153 wide_int rem;
1154 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1155 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1156 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1157 rem = wi::mod_trunc
1158 (wi::to_wide (off),
1159 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1160 SIGNED);
1161 else
1162 /* If we can't compute the remainder simply force the initial
1163 condition to zero. */
1164 rem = wi::to_wide (off);
1165 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1166 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1167 /* And finally replace the initial condition. */
1168 access_fn = chrec_replace_initial_condition
1169 (access_fn, fold_convert (orig_type, off));
1170 /* ??? This is still not a suitable base object for
1171 dr_may_alias_p - the base object needs to be an
1172 access that covers the object as whole. With
1173 an evolution in the pointer this cannot be
1174 guaranteed.
1175 As a band-aid, mark the access so we can special-case
1176 it in dr_may_alias_p. */
1177 tree old = ref;
1178 ref = fold_build2_loc (EXPR_LOCATION (ref),
1179 MEM_REF, TREE_TYPE (ref),
1180 base, memoff);
1181 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1182 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1183 DR_UNCONSTRAINED_BASE (dr) = true;
1184 access_fns.safe_push (access_fn);
1185 }
1186 }
1187 else if (DECL_P (ref))
1188 {
1189 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1190 ref = build2 (MEM_REF, TREE_TYPE (ref),
1191 build_fold_addr_expr (ref),
1192 build_int_cst (reference_alias_ptr_type (ref), 0));
1193 }
1194
1195 DR_BASE_OBJECT (dr) = ref;
1196 DR_ACCESS_FNS (dr) = access_fns;
1197 }
1198
1199 /* Extracts the alias analysis information from the memory reference DR. */
1200
1201 static void
1202 dr_analyze_alias (struct data_reference *dr)
1203 {
1204 tree ref = DR_REF (dr);
1205 tree base = get_base_address (ref), addr;
1206
1207 if (INDIRECT_REF_P (base)
1208 || TREE_CODE (base) == MEM_REF)
1209 {
1210 addr = TREE_OPERAND (base, 0);
1211 if (TREE_CODE (addr) == SSA_NAME)
1212 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1213 }
1214 }
1215
1216 /* Frees data reference DR. */
1217
1218 void
1219 free_data_ref (data_reference_p dr)
1220 {
1221 DR_ACCESS_FNS (dr).release ();
1222 free (dr);
1223 }
1224
1225 /* Analyze memory reference MEMREF, which is accessed in STMT.
1226 The reference is a read if IS_READ is true, otherwise it is a write.
1227 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1228 within STMT, i.e. that it might not occur even if STMT is executed
1229 and runs to completion.
1230
1231 Return the data_reference description of MEMREF. NEST is the outermost
1232 loop in which the reference should be instantiated, LOOP is the loop
1233 in which the data reference should be analyzed. */
1234
1235 struct data_reference *
1236 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1237 bool is_read, bool is_conditional_in_stmt)
1238 {
1239 struct data_reference *dr;
1240
1241 if (dump_file && (dump_flags & TDF_DETAILS))
1242 {
1243 fprintf (dump_file, "Creating dr for ");
1244 print_generic_expr (dump_file, memref, TDF_SLIM);
1245 fprintf (dump_file, "\n");
1246 }
1247
1248 dr = XCNEW (struct data_reference);
1249 DR_STMT (dr) = stmt;
1250 DR_REF (dr) = memref;
1251 DR_IS_READ (dr) = is_read;
1252 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1253
1254 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1255 nest != NULL ? loop : NULL, stmt);
1256 dr_analyze_indices (dr, nest, loop);
1257 dr_analyze_alias (dr);
1258
1259 if (dump_file && (dump_flags & TDF_DETAILS))
1260 {
1261 unsigned i;
1262 fprintf (dump_file, "\tbase_address: ");
1263 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1264 fprintf (dump_file, "\n\toffset from base address: ");
1265 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1266 fprintf (dump_file, "\n\tconstant offset from base address: ");
1267 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1268 fprintf (dump_file, "\n\tstep: ");
1269 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1270 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1271 fprintf (dump_file, "\n\tbase misalignment: %d",
1272 DR_BASE_MISALIGNMENT (dr));
1273 fprintf (dump_file, "\n\toffset alignment: %d",
1274 DR_OFFSET_ALIGNMENT (dr));
1275 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1276 fprintf (dump_file, "\n\tbase_object: ");
1277 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1278 fprintf (dump_file, "\n");
1279 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1280 {
1281 fprintf (dump_file, "\tAccess function %d: ", i);
1282 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1283 }
1284 }
1285
1286 return dr;
1287 }
1288
1289 /* A helper function computes order between two tree expressions T1 and T2.
1290 This is used in comparator functions sorting objects based on the order
1291 of tree expressions. The function returns -1, 0, or 1. */
1292
1293 int
1294 data_ref_compare_tree (tree t1, tree t2)
1295 {
1296 int i, cmp;
1297 enum tree_code code;
1298 char tclass;
1299
1300 if (t1 == t2)
1301 return 0;
1302 if (t1 == NULL)
1303 return -1;
1304 if (t2 == NULL)
1305 return 1;
1306
1307 STRIP_USELESS_TYPE_CONVERSION (t1);
1308 STRIP_USELESS_TYPE_CONVERSION (t2);
1309 if (t1 == t2)
1310 return 0;
1311
1312 if (TREE_CODE (t1) != TREE_CODE (t2)
1313 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1314 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1315
1316 code = TREE_CODE (t1);
1317 switch (code)
1318 {
1319 case INTEGER_CST:
1320 return tree_int_cst_compare (t1, t2);
1321
1322 case STRING_CST:
1323 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1324 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1325 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1326 TREE_STRING_LENGTH (t1));
1327
1328 case SSA_NAME:
1329 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1330 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1331 break;
1332
1333 default:
1334 if (POLY_INT_CST_P (t1))
1335 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1336 wi::to_poly_widest (t2));
1337
1338 tclass = TREE_CODE_CLASS (code);
1339
1340 /* For decls, compare their UIDs. */
1341 if (tclass == tcc_declaration)
1342 {
1343 if (DECL_UID (t1) != DECL_UID (t2))
1344 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1345 break;
1346 }
1347 /* For expressions, compare their operands recursively. */
1348 else if (IS_EXPR_CODE_CLASS (tclass))
1349 {
1350 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1351 {
1352 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1353 TREE_OPERAND (t2, i));
1354 if (cmp != 0)
1355 return cmp;
1356 }
1357 }
1358 else
1359 gcc_unreachable ();
1360 }
1361
1362 return 0;
1363 }
1364
1365 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1366 check. */
1367
1368 opt_result
1369 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1370 {
1371 if (dump_enabled_p ())
1372 dump_printf (MSG_NOTE,
1373 "consider run-time aliasing test between %T and %T\n",
1374 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1375
1376 if (!speed_p)
1377 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1378 "runtime alias check not supported when"
1379 " optimizing for size.\n");
1380
1381 /* FORNOW: We don't support versioning with outer-loop in either
1382 vectorization or loop distribution. */
1383 if (loop != NULL && loop->inner != NULL)
1384 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1385 "runtime alias check not supported for"
1386 " outer loop.\n");
1387
1388 return opt_result::success ();
1389 }
1390
1391 /* Operator == between two dr_with_seg_len objects.
1392
1393 This equality operator is used to make sure two data refs
1394 are the same one so that we will consider to combine the
1395 aliasing checks of those two pairs of data dependent data
1396 refs. */
1397
1398 static bool
1399 operator == (const dr_with_seg_len& d1,
1400 const dr_with_seg_len& d2)
1401 {
1402 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1403 DR_BASE_ADDRESS (d2.dr), 0)
1404 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1405 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1406 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1407 && known_eq (d1.access_size, d2.access_size)
1408 && d1.align == d2.align);
1409 }
1410
1411 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1412 so that we can combine aliasing checks in one scan. */
1413
1414 static int
1415 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1416 {
1417 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1418 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1419 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1420 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1421
1422 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1423 if a and c have the same basic address snd step, and b and d have the same
1424 address and step. Therefore, if any a&c or b&d don't have the same address
1425 and step, we don't care the order of those two pairs after sorting. */
1426 int comp_res;
1427
1428 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1429 DR_BASE_ADDRESS (b1.dr))) != 0)
1430 return comp_res;
1431 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1432 DR_BASE_ADDRESS (b2.dr))) != 0)
1433 return comp_res;
1434 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1435 DR_STEP (b1.dr))) != 0)
1436 return comp_res;
1437 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1438 DR_STEP (b2.dr))) != 0)
1439 return comp_res;
1440 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1441 DR_OFFSET (b1.dr))) != 0)
1442 return comp_res;
1443 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1444 DR_INIT (b1.dr))) != 0)
1445 return comp_res;
1446 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1447 DR_OFFSET (b2.dr))) != 0)
1448 return comp_res;
1449 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1450 DR_INIT (b2.dr))) != 0)
1451 return comp_res;
1452
1453 return 0;
1454 }
1455
1456 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1457 FACTOR is number of iterations that each data reference is accessed.
1458
1459 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1460 we create an expression:
1461
1462 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1463 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1464
1465 for aliasing checks. However, in some cases we can decrease the number
1466 of checks by combining two checks into one. For example, suppose we have
1467 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1468 condition is satisfied:
1469
1470 load_ptr_0 < load_ptr_1 &&
1471 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1472
1473 (this condition means, in each iteration of vectorized loop, the accessed
1474 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1475 load_ptr_1.)
1476
1477 we then can use only the following expression to finish the alising checks
1478 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1479
1480 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1481 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1482
1483 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1484 basic address. */
1485
1486 void
1487 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1488 poly_uint64)
1489 {
1490 /* Sort the collected data ref pairs so that we can scan them once to
1491 combine all possible aliasing checks. */
1492 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1493
1494 /* Scan the sorted dr pairs and check if we can combine alias checks
1495 of two neighboring dr pairs. */
1496 for (size_t i = 1; i < alias_pairs->length (); ++i)
1497 {
1498 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1499 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1500 *dr_b1 = &(*alias_pairs)[i-1].second,
1501 *dr_a2 = &(*alias_pairs)[i].first,
1502 *dr_b2 = &(*alias_pairs)[i].second;
1503
1504 /* Remove duplicate data ref pairs. */
1505 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1506 {
1507 if (dump_enabled_p ())
1508 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1509 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1510 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1511 alias_pairs->ordered_remove (i--);
1512 continue;
1513 }
1514
1515 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1516 {
1517 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1518 and DR_A1 and DR_A2 are two consecutive memrefs. */
1519 if (*dr_a1 == *dr_a2)
1520 {
1521 std::swap (dr_a1, dr_b1);
1522 std::swap (dr_a2, dr_b2);
1523 }
1524
1525 poly_int64 init_a1, init_a2;
1526 /* Only consider cases in which the distance between the initial
1527 DR_A1 and the initial DR_A2 is known at compile time. */
1528 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1529 DR_BASE_ADDRESS (dr_a2->dr), 0)
1530 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1531 DR_OFFSET (dr_a2->dr), 0)
1532 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1533 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1534 continue;
1535
1536 /* Don't combine if we can't tell which one comes first. */
1537 if (!ordered_p (init_a1, init_a2))
1538 continue;
1539
1540 /* Make sure dr_a1 starts left of dr_a2. */
1541 if (maybe_gt (init_a1, init_a2))
1542 {
1543 std::swap (*dr_a1, *dr_a2);
1544 std::swap (init_a1, init_a2);
1545 }
1546
1547 /* Work out what the segment length would be if we did combine
1548 DR_A1 and DR_A2:
1549
1550 - If DR_A1 and DR_A2 have equal lengths, that length is
1551 also the combined length.
1552
1553 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1554 length is the lower bound on those lengths.
1555
1556 - If DR_A1 and DR_A2 both have positive lengths, the combined
1557 length is the upper bound on those lengths.
1558
1559 Other cases are unlikely to give a useful combination.
1560
1561 The lengths both have sizetype, so the sign is taken from
1562 the step instead. */
1563 if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1564 {
1565 poly_uint64 seg_len_a1, seg_len_a2;
1566 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1567 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1568 continue;
1569
1570 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1571 if (TREE_CODE (indicator_a) != INTEGER_CST)
1572 continue;
1573
1574 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1575 if (TREE_CODE (indicator_b) != INTEGER_CST)
1576 continue;
1577
1578 int sign_a = tree_int_cst_sgn (indicator_a);
1579 int sign_b = tree_int_cst_sgn (indicator_b);
1580
1581 poly_uint64 new_seg_len;
1582 if (sign_a <= 0 && sign_b <= 0)
1583 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1584 else if (sign_a >= 0 && sign_b >= 0)
1585 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1586 else
1587 continue;
1588
1589 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1590 new_seg_len);
1591 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1592 }
1593
1594 /* This is always positive due to the swap above. */
1595 poly_uint64 diff = init_a2 - init_a1;
1596
1597 /* The new check will start at DR_A1. Make sure that its access
1598 size encompasses the initial DR_A2. */
1599 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1600 {
1601 dr_a1->access_size = upper_bound (dr_a1->access_size,
1602 diff + dr_a2->access_size);
1603 unsigned int new_align = known_alignment (dr_a1->access_size);
1604 dr_a1->align = MIN (dr_a1->align, new_align);
1605 }
1606 if (dump_enabled_p ())
1607 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1608 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1609 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1610 alias_pairs->ordered_remove (i);
1611 i--;
1612 }
1613 }
1614 }
1615
1616 /* Given LOOP's two data references and segment lengths described by DR_A
1617 and DR_B, create expression checking if the two addresses ranges intersect
1618 with each other based on index of the two addresses. This can only be
1619 done if DR_A and DR_B referring to the same (array) object and the index
1620 is the only difference. For example:
1621
1622 DR_A DR_B
1623 data-ref arr[i] arr[j]
1624 base_object arr arr
1625 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1626
1627 The addresses and their index are like:
1628
1629 |<- ADDR_A ->| |<- ADDR_B ->|
1630 ------------------------------------------------------->
1631 | | | | | | | | | |
1632 ------------------------------------------------------->
1633 i_0 ... i_0+4 j_0 ... j_0+4
1634
1635 We can create expression based on index rather than address:
1636
1637 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1638
1639 Note evolution step of index needs to be considered in comparison. */
1640
1641 static bool
1642 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1643 const dr_with_seg_len& dr_a,
1644 const dr_with_seg_len& dr_b)
1645 {
1646 if (integer_zerop (DR_STEP (dr_a.dr))
1647 || integer_zerop (DR_STEP (dr_b.dr))
1648 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1649 return false;
1650
1651 poly_uint64 seg_len1, seg_len2;
1652 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1653 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1654 return false;
1655
1656 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1657 return false;
1658
1659 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1660 return false;
1661
1662 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1663 return false;
1664
1665 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1666
1667 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1668 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1669 if (neg_step)
1670 {
1671 abs_step = -abs_step;
1672 seg_len1 = -seg_len1;
1673 seg_len2 = -seg_len2;
1674 }
1675 else
1676 {
1677 /* Include the access size in the length, so that we only have one
1678 tree addition below. */
1679 seg_len1 += dr_a.access_size;
1680 seg_len2 += dr_b.access_size;
1681 }
1682
1683 /* Infer the number of iterations with which the memory segment is accessed
1684 by DR. In other words, alias is checked if memory segment accessed by
1685 DR_A in some iterations intersect with memory segment accessed by DR_B
1686 in the same amount iterations.
1687 Note segnment length is a linear function of number of iterations with
1688 DR_STEP as the coefficient. */
1689 poly_uint64 niter_len1, niter_len2;
1690 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1691 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1692 return false;
1693
1694 poly_uint64 niter_access1 = 0, niter_access2 = 0;
1695 if (neg_step)
1696 {
1697 /* Divide each access size by the byte step, rounding up. */
1698 if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1699 abs_step, &niter_access1)
1700 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1701 abs_step, &niter_access2))
1702 return false;
1703 }
1704
1705 unsigned int i;
1706 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1707 {
1708 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1709 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1710 /* Two indices must be the same if they are not scev, or not scev wrto
1711 current loop being vecorized. */
1712 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1713 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1714 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1715 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1716 {
1717 if (operand_equal_p (access1, access2, 0))
1718 continue;
1719
1720 return false;
1721 }
1722 /* The two indices must have the same step. */
1723 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1724 return false;
1725
1726 tree idx_step = CHREC_RIGHT (access1);
1727 /* Index must have const step, otherwise DR_STEP won't be constant. */
1728 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1729 /* Index must evaluate in the same direction as DR. */
1730 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1731
1732 tree min1 = CHREC_LEFT (access1);
1733 tree min2 = CHREC_LEFT (access2);
1734 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1735 return false;
1736
1737 /* Ideally, alias can be checked against loop's control IV, but we
1738 need to prove linear mapping between control IV and reference
1739 index. Although that should be true, we check against (array)
1740 index of data reference. Like segment length, index length is
1741 linear function of the number of iterations with index_step as
1742 the coefficient, i.e, niter_len * idx_step. */
1743 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1744 build_int_cst (TREE_TYPE (min1),
1745 niter_len1));
1746 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1747 build_int_cst (TREE_TYPE (min2),
1748 niter_len2));
1749 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1750 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1751 /* Adjust ranges for negative step. */
1752 if (neg_step)
1753 {
1754 /* IDX_LEN1 and IDX_LEN2 are negative in this case. */
1755 std::swap (min1, max1);
1756 std::swap (min2, max2);
1757
1758 /* As with the lengths just calculated, we've measured the access
1759 sizes in iterations, so multiply them by the index step. */
1760 tree idx_access1
1761 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1762 build_int_cst (TREE_TYPE (min1), niter_access1));
1763 tree idx_access2
1764 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1765 build_int_cst (TREE_TYPE (min2), niter_access2));
1766
1767 /* MINUS_EXPR because the above values are negative. */
1768 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1769 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1770 }
1771 tree part_cond_expr
1772 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1773 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1774 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1775 if (*cond_expr)
1776 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1777 *cond_expr, part_cond_expr);
1778 else
1779 *cond_expr = part_cond_expr;
1780 }
1781 return true;
1782 }
1783
1784 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1785 every address ADDR accessed by D:
1786
1787 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1788
1789 In this case, every element accessed by D is aligned to at least
1790 ALIGN bytes.
1791
1792 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1793
1794 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
1795
1796 static void
1797 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1798 tree *seg_max_out, HOST_WIDE_INT align)
1799 {
1800 /* Each access has the following pattern:
1801
1802 <- |seg_len| ->
1803 <--- A: -ve step --->
1804 +-----+-------+-----+-------+-----+
1805 | n-1 | ,.... | 0 | ..... | n-1 |
1806 +-----+-------+-----+-------+-----+
1807 <--- B: +ve step --->
1808 <- |seg_len| ->
1809 |
1810 base address
1811
1812 where "n" is the number of scalar iterations covered by the segment.
1813 (This should be VF for a particular pair if we know that both steps
1814 are the same, otherwise it will be the full number of scalar loop
1815 iterations.)
1816
1817 A is the range of bytes accessed when the step is negative,
1818 B is the range when the step is positive.
1819
1820 If the access size is "access_size" bytes, the lowest addressed byte is:
1821
1822 base + (step < 0 ? seg_len : 0) [LB]
1823
1824 and the highest addressed byte is always below:
1825
1826 base + (step < 0 ? 0 : seg_len) + access_size [UB]
1827
1828 Thus:
1829
1830 LB <= ADDR < UB
1831
1832 If ALIGN is nonzero, all three values are aligned to at least ALIGN
1833 bytes, so:
1834
1835 LB <= ADDR <= UB - ALIGN
1836
1837 where "- ALIGN" folds naturally with the "+ access_size" and often
1838 cancels it out.
1839
1840 We don't try to simplify LB and UB beyond this (e.g. by using
1841 MIN and MAX based on whether seg_len rather than the stride is
1842 negative) because it is possible for the absolute size of the
1843 segment to overflow the range of a ssize_t.
1844
1845 Keeping the pointer_plus outside of the cond_expr should allow
1846 the cond_exprs to be shared with other alias checks. */
1847 tree indicator = dr_direction_indicator (d.dr);
1848 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1849 fold_convert (ssizetype, indicator),
1850 ssize_int (0));
1851 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1852 DR_OFFSET (d.dr));
1853 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1854 tree seg_len
1855 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1856
1857 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1858 seg_len, size_zero_node);
1859 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1860 size_zero_node, seg_len);
1861 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1862 size_int (d.access_size - align));
1863
1864 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1865 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1866 }
1867
1868 /* Given two data references and segment lengths described by DR_A and DR_B,
1869 create expression checking if the two addresses ranges intersect with
1870 each other:
1871
1872 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1873 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1874
1875 static void
1876 create_intersect_range_checks (class loop *loop, tree *cond_expr,
1877 const dr_with_seg_len& dr_a,
1878 const dr_with_seg_len& dr_b)
1879 {
1880 *cond_expr = NULL_TREE;
1881 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1882 return;
1883
1884 unsigned HOST_WIDE_INT min_align;
1885 tree_code cmp_code;
1886 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1887 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1888 {
1889 /* In this case adding access_size to seg_len is likely to give
1890 a simple X * step, where X is either the number of scalar
1891 iterations or the vectorization factor. We're better off
1892 keeping that, rather than subtracting an alignment from it.
1893
1894 In this case the maximum values are exclusive and so there is
1895 no alias if the maximum of one segment equals the minimum
1896 of another. */
1897 min_align = 0;
1898 cmp_code = LE_EXPR;
1899 }
1900 else
1901 {
1902 /* Calculate the minimum alignment shared by all four pointers,
1903 then arrange for this alignment to be subtracted from the
1904 exclusive maximum values to get inclusive maximum values.
1905 This "- min_align" is cumulative with a "+ access_size"
1906 in the calculation of the maximum values. In the best
1907 (and common) case, the two cancel each other out, leaving
1908 us with an inclusive bound based only on seg_len. In the
1909 worst case we're simply adding a smaller number than before.
1910
1911 Because the maximum values are inclusive, there is an alias
1912 if the maximum value of one segment is equal to the minimum
1913 value of the other. */
1914 min_align = MIN (dr_a.align, dr_b.align);
1915 cmp_code = LT_EXPR;
1916 }
1917
1918 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1919 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1920 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1921
1922 *cond_expr
1923 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1924 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1925 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1926 }
1927
1928 /* Create a conditional expression that represents the run-time checks for
1929 overlapping of address ranges represented by a list of data references
1930 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1931 COND_EXPR is the conditional expression to be used in the if statement
1932 that controls which version of the loop gets executed at runtime. */
1933
1934 void
1935 create_runtime_alias_checks (class loop *loop,
1936 vec<dr_with_seg_len_pair_t> *alias_pairs,
1937 tree * cond_expr)
1938 {
1939 tree part_cond_expr;
1940
1941 fold_defer_overflow_warnings ();
1942 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1943 {
1944 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1945 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1946
1947 if (dump_enabled_p ())
1948 dump_printf (MSG_NOTE,
1949 "create runtime check for data references %T and %T\n",
1950 DR_REF (dr_a.dr), DR_REF (dr_b.dr));
1951
1952 /* Create condition expression for each pair data references. */
1953 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1954 if (*cond_expr)
1955 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1956 *cond_expr, part_cond_expr);
1957 else
1958 *cond_expr = part_cond_expr;
1959 }
1960 fold_undefer_and_ignore_overflow_warnings ();
1961 }
1962
1963 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1964 expressions. */
1965 static bool
1966 dr_equal_offsets_p1 (tree offset1, tree offset2)
1967 {
1968 bool res;
1969
1970 STRIP_NOPS (offset1);
1971 STRIP_NOPS (offset2);
1972
1973 if (offset1 == offset2)
1974 return true;
1975
1976 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1977 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1978 return false;
1979
1980 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1981 TREE_OPERAND (offset2, 0));
1982
1983 if (!res || !BINARY_CLASS_P (offset1))
1984 return res;
1985
1986 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1987 TREE_OPERAND (offset2, 1));
1988
1989 return res;
1990 }
1991
1992 /* Check if DRA and DRB have equal offsets. */
1993 bool
1994 dr_equal_offsets_p (struct data_reference *dra,
1995 struct data_reference *drb)
1996 {
1997 tree offset1, offset2;
1998
1999 offset1 = DR_OFFSET (dra);
2000 offset2 = DR_OFFSET (drb);
2001
2002 return dr_equal_offsets_p1 (offset1, offset2);
2003 }
2004
2005 /* Returns true if FNA == FNB. */
2006
2007 static bool
2008 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2009 {
2010 unsigned i, n = fna.length ();
2011
2012 if (n != fnb.length ())
2013 return false;
2014
2015 for (i = 0; i < n; i++)
2016 if (!operand_equal_p (fna[i], fnb[i], 0))
2017 return false;
2018
2019 return true;
2020 }
2021
2022 /* If all the functions in CF are the same, returns one of them,
2023 otherwise returns NULL. */
2024
2025 static affine_fn
2026 common_affine_function (conflict_function *cf)
2027 {
2028 unsigned i;
2029 affine_fn comm;
2030
2031 if (!CF_NONTRIVIAL_P (cf))
2032 return affine_fn ();
2033
2034 comm = cf->fns[0];
2035
2036 for (i = 1; i < cf->n; i++)
2037 if (!affine_function_equal_p (comm, cf->fns[i]))
2038 return affine_fn ();
2039
2040 return comm;
2041 }
2042
2043 /* Returns the base of the affine function FN. */
2044
2045 static tree
2046 affine_function_base (affine_fn fn)
2047 {
2048 return fn[0];
2049 }
2050
2051 /* Returns true if FN is a constant. */
2052
2053 static bool
2054 affine_function_constant_p (affine_fn fn)
2055 {
2056 unsigned i;
2057 tree coef;
2058
2059 for (i = 1; fn.iterate (i, &coef); i++)
2060 if (!integer_zerop (coef))
2061 return false;
2062
2063 return true;
2064 }
2065
2066 /* Returns true if FN is the zero constant function. */
2067
2068 static bool
2069 affine_function_zero_p (affine_fn fn)
2070 {
2071 return (integer_zerop (affine_function_base (fn))
2072 && affine_function_constant_p (fn));
2073 }
2074
2075 /* Returns a signed integer type with the largest precision from TA
2076 and TB. */
2077
2078 static tree
2079 signed_type_for_types (tree ta, tree tb)
2080 {
2081 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2082 return signed_type_for (ta);
2083 else
2084 return signed_type_for (tb);
2085 }
2086
2087 /* Applies operation OP on affine functions FNA and FNB, and returns the
2088 result. */
2089
2090 static affine_fn
2091 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2092 {
2093 unsigned i, n, m;
2094 affine_fn ret;
2095 tree coef;
2096
2097 if (fnb.length () > fna.length ())
2098 {
2099 n = fna.length ();
2100 m = fnb.length ();
2101 }
2102 else
2103 {
2104 n = fnb.length ();
2105 m = fna.length ();
2106 }
2107
2108 ret.create (m);
2109 for (i = 0; i < n; i++)
2110 {
2111 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2112 TREE_TYPE (fnb[i]));
2113 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2114 }
2115
2116 for (; fna.iterate (i, &coef); i++)
2117 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2118 coef, integer_zero_node));
2119 for (; fnb.iterate (i, &coef); i++)
2120 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2121 integer_zero_node, coef));
2122
2123 return ret;
2124 }
2125
2126 /* Returns the sum of affine functions FNA and FNB. */
2127
2128 static affine_fn
2129 affine_fn_plus (affine_fn fna, affine_fn fnb)
2130 {
2131 return affine_fn_op (PLUS_EXPR, fna, fnb);
2132 }
2133
2134 /* Returns the difference of affine functions FNA and FNB. */
2135
2136 static affine_fn
2137 affine_fn_minus (affine_fn fna, affine_fn fnb)
2138 {
2139 return affine_fn_op (MINUS_EXPR, fna, fnb);
2140 }
2141
2142 /* Frees affine function FN. */
2143
2144 static void
2145 affine_fn_free (affine_fn fn)
2146 {
2147 fn.release ();
2148 }
2149
2150 /* Determine for each subscript in the data dependence relation DDR
2151 the distance. */
2152
2153 static void
2154 compute_subscript_distance (struct data_dependence_relation *ddr)
2155 {
2156 conflict_function *cf_a, *cf_b;
2157 affine_fn fn_a, fn_b, diff;
2158
2159 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2160 {
2161 unsigned int i;
2162
2163 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2164 {
2165 struct subscript *subscript;
2166
2167 subscript = DDR_SUBSCRIPT (ddr, i);
2168 cf_a = SUB_CONFLICTS_IN_A (subscript);
2169 cf_b = SUB_CONFLICTS_IN_B (subscript);
2170
2171 fn_a = common_affine_function (cf_a);
2172 fn_b = common_affine_function (cf_b);
2173 if (!fn_a.exists () || !fn_b.exists ())
2174 {
2175 SUB_DISTANCE (subscript) = chrec_dont_know;
2176 return;
2177 }
2178 diff = affine_fn_minus (fn_a, fn_b);
2179
2180 if (affine_function_constant_p (diff))
2181 SUB_DISTANCE (subscript) = affine_function_base (diff);
2182 else
2183 SUB_DISTANCE (subscript) = chrec_dont_know;
2184
2185 affine_fn_free (diff);
2186 }
2187 }
2188 }
2189
2190 /* Returns the conflict function for "unknown". */
2191
2192 static conflict_function *
2193 conflict_fn_not_known (void)
2194 {
2195 conflict_function *fn = XCNEW (conflict_function);
2196 fn->n = NOT_KNOWN;
2197
2198 return fn;
2199 }
2200
2201 /* Returns the conflict function for "independent". */
2202
2203 static conflict_function *
2204 conflict_fn_no_dependence (void)
2205 {
2206 conflict_function *fn = XCNEW (conflict_function);
2207 fn->n = NO_DEPENDENCE;
2208
2209 return fn;
2210 }
2211
2212 /* Returns true if the address of OBJ is invariant in LOOP. */
2213
2214 static bool
2215 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2216 {
2217 while (handled_component_p (obj))
2218 {
2219 if (TREE_CODE (obj) == ARRAY_REF)
2220 {
2221 for (int i = 1; i < 4; ++i)
2222 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2223 loop->num))
2224 return false;
2225 }
2226 else if (TREE_CODE (obj) == COMPONENT_REF)
2227 {
2228 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2229 loop->num))
2230 return false;
2231 }
2232 obj = TREE_OPERAND (obj, 0);
2233 }
2234
2235 if (!INDIRECT_REF_P (obj)
2236 && TREE_CODE (obj) != MEM_REF)
2237 return true;
2238
2239 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2240 loop->num);
2241 }
2242
2243 /* Returns false if we can prove that data references A and B do not alias,
2244 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2245 considered. */
2246
2247 bool
2248 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2249 class loop *loop_nest)
2250 {
2251 tree addr_a = DR_BASE_OBJECT (a);
2252 tree addr_b = DR_BASE_OBJECT (b);
2253
2254 /* If we are not processing a loop nest but scalar code we
2255 do not need to care about possible cross-iteration dependences
2256 and thus can process the full original reference. Do so,
2257 similar to how loop invariant motion applies extra offset-based
2258 disambiguation. */
2259 if (!loop_nest)
2260 {
2261 aff_tree off1, off2;
2262 poly_widest_int size1, size2;
2263 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2264 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2265 aff_combination_scale (&off1, -1);
2266 aff_combination_add (&off2, &off1);
2267 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2268 return false;
2269 }
2270
2271 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2272 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2273 /* For cross-iteration dependences the cliques must be valid for the
2274 whole loop, not just individual iterations. */
2275 && (!loop_nest
2276 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2277 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2278 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2279 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2280 return false;
2281
2282 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2283 do not know the size of the base-object. So we cannot do any
2284 offset/overlap based analysis but have to rely on points-to
2285 information only. */
2286 if (TREE_CODE (addr_a) == MEM_REF
2287 && (DR_UNCONSTRAINED_BASE (a)
2288 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2289 {
2290 /* For true dependences we can apply TBAA. */
2291 if (flag_strict_aliasing
2292 && DR_IS_WRITE (a) && DR_IS_READ (b)
2293 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2294 get_alias_set (DR_REF (b))))
2295 return false;
2296 if (TREE_CODE (addr_b) == MEM_REF)
2297 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2298 TREE_OPERAND (addr_b, 0));
2299 else
2300 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2301 build_fold_addr_expr (addr_b));
2302 }
2303 else if (TREE_CODE (addr_b) == MEM_REF
2304 && (DR_UNCONSTRAINED_BASE (b)
2305 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2306 {
2307 /* For true dependences we can apply TBAA. */
2308 if (flag_strict_aliasing
2309 && DR_IS_WRITE (a) && DR_IS_READ (b)
2310 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2311 get_alias_set (DR_REF (b))))
2312 return false;
2313 if (TREE_CODE (addr_a) == MEM_REF)
2314 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2315 TREE_OPERAND (addr_b, 0));
2316 else
2317 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2318 TREE_OPERAND (addr_b, 0));
2319 }
2320
2321 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2322 that is being subsetted in the loop nest. */
2323 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2324 return refs_output_dependent_p (addr_a, addr_b);
2325 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2326 return refs_anti_dependent_p (addr_a, addr_b);
2327 return refs_may_alias_p (addr_a, addr_b);
2328 }
2329
2330 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2331 if it is meaningful to compare their associated access functions
2332 when checking for dependencies. */
2333
2334 static bool
2335 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2336 {
2337 /* Allow pairs of component refs from the following sets:
2338
2339 { REALPART_EXPR, IMAGPART_EXPR }
2340 { COMPONENT_REF }
2341 { ARRAY_REF }. */
2342 tree_code code_a = TREE_CODE (ref_a);
2343 tree_code code_b = TREE_CODE (ref_b);
2344 if (code_a == IMAGPART_EXPR)
2345 code_a = REALPART_EXPR;
2346 if (code_b == IMAGPART_EXPR)
2347 code_b = REALPART_EXPR;
2348 if (code_a != code_b)
2349 return false;
2350
2351 if (TREE_CODE (ref_a) == COMPONENT_REF)
2352 /* ??? We cannot simply use the type of operand #0 of the refs here as
2353 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2354 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2355 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2356 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2357
2358 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2359 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2360 }
2361
2362 /* Initialize a data dependence relation between data accesses A and
2363 B. NB_LOOPS is the number of loops surrounding the references: the
2364 size of the classic distance/direction vectors. */
2365
2366 struct data_dependence_relation *
2367 initialize_data_dependence_relation (struct data_reference *a,
2368 struct data_reference *b,
2369 vec<loop_p> loop_nest)
2370 {
2371 struct data_dependence_relation *res;
2372 unsigned int i;
2373
2374 res = XCNEW (struct data_dependence_relation);
2375 DDR_A (res) = a;
2376 DDR_B (res) = b;
2377 DDR_LOOP_NEST (res).create (0);
2378 DDR_SUBSCRIPTS (res).create (0);
2379 DDR_DIR_VECTS (res).create (0);
2380 DDR_DIST_VECTS (res).create (0);
2381
2382 if (a == NULL || b == NULL)
2383 {
2384 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2385 return res;
2386 }
2387
2388 /* If the data references do not alias, then they are independent. */
2389 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2390 {
2391 DDR_ARE_DEPENDENT (res) = chrec_known;
2392 return res;
2393 }
2394
2395 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2396 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2397 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2398 {
2399 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2400 return res;
2401 }
2402
2403 /* For unconstrained bases, the root (highest-indexed) subscript
2404 describes a variation in the base of the original DR_REF rather
2405 than a component access. We have no type that accurately describes
2406 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2407 applying this subscript) so limit the search to the last real
2408 component access.
2409
2410 E.g. for:
2411
2412 void
2413 f (int a[][8], int b[][8])
2414 {
2415 for (int i = 0; i < 8; ++i)
2416 a[i * 2][0] = b[i][0];
2417 }
2418
2419 the a and b accesses have a single ARRAY_REF component reference [0]
2420 but have two subscripts. */
2421 if (DR_UNCONSTRAINED_BASE (a))
2422 num_dimensions_a -= 1;
2423 if (DR_UNCONSTRAINED_BASE (b))
2424 num_dimensions_b -= 1;
2425
2426 /* These structures describe sequences of component references in
2427 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2428 specific access function. */
2429 struct {
2430 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2431 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2432 indices. In C notation, these are the indices of the rightmost
2433 component references; e.g. for a sequence .b.c.d, the start
2434 index is for .d. */
2435 unsigned int start_a;
2436 unsigned int start_b;
2437
2438 /* The sequence contains LENGTH consecutive access functions from
2439 each DR. */
2440 unsigned int length;
2441
2442 /* The enclosing objects for the A and B sequences respectively,
2443 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2444 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2445 tree object_a;
2446 tree object_b;
2447 } full_seq = {}, struct_seq = {};
2448
2449 /* Before each iteration of the loop:
2450
2451 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2452 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2453 unsigned int index_a = 0;
2454 unsigned int index_b = 0;
2455 tree ref_a = DR_REF (a);
2456 tree ref_b = DR_REF (b);
2457
2458 /* Now walk the component references from the final DR_REFs back up to
2459 the enclosing base objects. Each component reference corresponds
2460 to one access function in the DR, with access function 0 being for
2461 the final DR_REF and the highest-indexed access function being the
2462 one that is applied to the base of the DR.
2463
2464 Look for a sequence of component references whose access functions
2465 are comparable (see access_fn_components_comparable_p). If more
2466 than one such sequence exists, pick the one nearest the base
2467 (which is the leftmost sequence in C notation). Store this sequence
2468 in FULL_SEQ.
2469
2470 For example, if we have:
2471
2472 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2473
2474 A: a[0][i].s.c.d
2475 B: __real b[0][i].s.e[i].f
2476
2477 (where d is the same type as the real component of f) then the access
2478 functions would be:
2479
2480 0 1 2 3
2481 A: .d .c .s [i]
2482
2483 0 1 2 3 4 5
2484 B: __real .f [i] .e .s [i]
2485
2486 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2487 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2488 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2489 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2490 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2491 index foo[10] arrays, so is again comparable. The sequence is
2492 therefore:
2493
2494 A: [1, 3] (i.e. [i].s.c)
2495 B: [3, 5] (i.e. [i].s.e)
2496
2497 Also look for sequences of component references whose access
2498 functions are comparable and whose enclosing objects have the same
2499 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2500 example, STRUCT_SEQ would be:
2501
2502 A: [1, 2] (i.e. s.c)
2503 B: [3, 4] (i.e. s.e) */
2504 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2505 {
2506 /* REF_A and REF_B must be one of the component access types
2507 allowed by dr_analyze_indices. */
2508 gcc_checking_assert (access_fn_component_p (ref_a));
2509 gcc_checking_assert (access_fn_component_p (ref_b));
2510
2511 /* Get the immediately-enclosing objects for REF_A and REF_B,
2512 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2513 and DR_ACCESS_FN (B, INDEX_B). */
2514 tree object_a = TREE_OPERAND (ref_a, 0);
2515 tree object_b = TREE_OPERAND (ref_b, 0);
2516
2517 tree type_a = TREE_TYPE (object_a);
2518 tree type_b = TREE_TYPE (object_b);
2519 if (access_fn_components_comparable_p (ref_a, ref_b))
2520 {
2521 /* This pair of component accesses is comparable for dependence
2522 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2523 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2524 if (full_seq.start_a + full_seq.length != index_a
2525 || full_seq.start_b + full_seq.length != index_b)
2526 {
2527 /* The accesses don't extend the current sequence,
2528 so start a new one here. */
2529 full_seq.start_a = index_a;
2530 full_seq.start_b = index_b;
2531 full_seq.length = 0;
2532 }
2533
2534 /* Add this pair of references to the sequence. */
2535 full_seq.length += 1;
2536 full_seq.object_a = object_a;
2537 full_seq.object_b = object_b;
2538
2539 /* If the enclosing objects are structures (and thus have the
2540 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2541 if (TREE_CODE (type_a) == RECORD_TYPE)
2542 struct_seq = full_seq;
2543
2544 /* Move to the next containing reference for both A and B. */
2545 ref_a = object_a;
2546 ref_b = object_b;
2547 index_a += 1;
2548 index_b += 1;
2549 continue;
2550 }
2551
2552 /* Try to approach equal type sizes. */
2553 if (!COMPLETE_TYPE_P (type_a)
2554 || !COMPLETE_TYPE_P (type_b)
2555 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2556 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2557 break;
2558
2559 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2560 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2561 if (size_a <= size_b)
2562 {
2563 index_a += 1;
2564 ref_a = object_a;
2565 }
2566 if (size_b <= size_a)
2567 {
2568 index_b += 1;
2569 ref_b = object_b;
2570 }
2571 }
2572
2573 /* See whether FULL_SEQ ends at the base and whether the two bases
2574 are equal. We do not care about TBAA or alignment info so we can
2575 use OEP_ADDRESS_OF to avoid false negatives. */
2576 tree base_a = DR_BASE_OBJECT (a);
2577 tree base_b = DR_BASE_OBJECT (b);
2578 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2579 && full_seq.start_b + full_seq.length == num_dimensions_b
2580 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2581 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2582 && types_compatible_p (TREE_TYPE (base_a),
2583 TREE_TYPE (base_b))
2584 && (!loop_nest.exists ()
2585 || (object_address_invariant_in_loop_p
2586 (loop_nest[0], base_a))));
2587
2588 /* If the bases are the same, we can include the base variation too.
2589 E.g. the b accesses in:
2590
2591 for (int i = 0; i < n; ++i)
2592 b[i + 4][0] = b[i][0];
2593
2594 have a definite dependence distance of 4, while for:
2595
2596 for (int i = 0; i < n; ++i)
2597 a[i + 4][0] = b[i][0];
2598
2599 the dependence distance depends on the gap between a and b.
2600
2601 If the bases are different then we can only rely on the sequence
2602 rooted at a structure access, since arrays are allowed to overlap
2603 arbitrarily and change shape arbitrarily. E.g. we treat this as
2604 valid code:
2605
2606 int a[256];
2607 ...
2608 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2609
2610 where two lvalues with the same int[4][3] type overlap, and where
2611 both lvalues are distinct from the object's declared type. */
2612 if (same_base_p)
2613 {
2614 if (DR_UNCONSTRAINED_BASE (a))
2615 full_seq.length += 1;
2616 }
2617 else
2618 full_seq = struct_seq;
2619
2620 /* Punt if we didn't find a suitable sequence. */
2621 if (full_seq.length == 0)
2622 {
2623 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2624 return res;
2625 }
2626
2627 if (!same_base_p)
2628 {
2629 /* Partial overlap is possible for different bases when strict aliasing
2630 is not in effect. It's also possible if either base involves a union
2631 access; e.g. for:
2632
2633 struct s1 { int a[2]; };
2634 struct s2 { struct s1 b; int c; };
2635 struct s3 { int d; struct s1 e; };
2636 union u { struct s2 f; struct s3 g; } *p, *q;
2637
2638 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2639 "p->g.e" (base "p->g") and might partially overlap the s1 at
2640 "q->g.e" (base "q->g"). */
2641 if (!flag_strict_aliasing
2642 || ref_contains_union_access_p (full_seq.object_a)
2643 || ref_contains_union_access_p (full_seq.object_b))
2644 {
2645 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2646 return res;
2647 }
2648
2649 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2650 if (!loop_nest.exists ()
2651 || (object_address_invariant_in_loop_p (loop_nest[0],
2652 full_seq.object_a)
2653 && object_address_invariant_in_loop_p (loop_nest[0],
2654 full_seq.object_b)))
2655 {
2656 DDR_OBJECT_A (res) = full_seq.object_a;
2657 DDR_OBJECT_B (res) = full_seq.object_b;
2658 }
2659 }
2660
2661 DDR_AFFINE_P (res) = true;
2662 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2663 DDR_SUBSCRIPTS (res).create (full_seq.length);
2664 DDR_LOOP_NEST (res) = loop_nest;
2665 DDR_SELF_REFERENCE (res) = false;
2666
2667 for (i = 0; i < full_seq.length; ++i)
2668 {
2669 struct subscript *subscript;
2670
2671 subscript = XNEW (struct subscript);
2672 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2673 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2674 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2675 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2676 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2677 SUB_DISTANCE (subscript) = chrec_dont_know;
2678 DDR_SUBSCRIPTS (res).safe_push (subscript);
2679 }
2680
2681 return res;
2682 }
2683
2684 /* Frees memory used by the conflict function F. */
2685
2686 static void
2687 free_conflict_function (conflict_function *f)
2688 {
2689 unsigned i;
2690
2691 if (CF_NONTRIVIAL_P (f))
2692 {
2693 for (i = 0; i < f->n; i++)
2694 affine_fn_free (f->fns[i]);
2695 }
2696 free (f);
2697 }
2698
2699 /* Frees memory used by SUBSCRIPTS. */
2700
2701 static void
2702 free_subscripts (vec<subscript_p> subscripts)
2703 {
2704 unsigned i;
2705 subscript_p s;
2706
2707 FOR_EACH_VEC_ELT (subscripts, i, s)
2708 {
2709 free_conflict_function (s->conflicting_iterations_in_a);
2710 free_conflict_function (s->conflicting_iterations_in_b);
2711 free (s);
2712 }
2713 subscripts.release ();
2714 }
2715
2716 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2717 description. */
2718
2719 static inline void
2720 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2721 tree chrec)
2722 {
2723 DDR_ARE_DEPENDENT (ddr) = chrec;
2724 free_subscripts (DDR_SUBSCRIPTS (ddr));
2725 DDR_SUBSCRIPTS (ddr).create (0);
2726 }
2727
2728 /* The dependence relation DDR cannot be represented by a distance
2729 vector. */
2730
2731 static inline void
2732 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2733 {
2734 if (dump_file && (dump_flags & TDF_DETAILS))
2735 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2736
2737 DDR_AFFINE_P (ddr) = false;
2738 }
2739
2740 \f
2741
2742 /* This section contains the classic Banerjee tests. */
2743
2744 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2745 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2746
2747 static inline bool
2748 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2749 {
2750 return (evolution_function_is_constant_p (chrec_a)
2751 && evolution_function_is_constant_p (chrec_b));
2752 }
2753
2754 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2755 variable, i.e., if the SIV (Single Index Variable) test is true. */
2756
2757 static bool
2758 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2759 {
2760 if ((evolution_function_is_constant_p (chrec_a)
2761 && evolution_function_is_univariate_p (chrec_b))
2762 || (evolution_function_is_constant_p (chrec_b)
2763 && evolution_function_is_univariate_p (chrec_a)))
2764 return true;
2765
2766 if (evolution_function_is_univariate_p (chrec_a)
2767 && evolution_function_is_univariate_p (chrec_b))
2768 {
2769 switch (TREE_CODE (chrec_a))
2770 {
2771 case POLYNOMIAL_CHREC:
2772 switch (TREE_CODE (chrec_b))
2773 {
2774 case POLYNOMIAL_CHREC:
2775 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2776 return false;
2777 /* FALLTHRU */
2778
2779 default:
2780 return true;
2781 }
2782
2783 default:
2784 return true;
2785 }
2786 }
2787
2788 return false;
2789 }
2790
2791 /* Creates a conflict function with N dimensions. The affine functions
2792 in each dimension follow. */
2793
2794 static conflict_function *
2795 conflict_fn (unsigned n, ...)
2796 {
2797 unsigned i;
2798 conflict_function *ret = XCNEW (conflict_function);
2799 va_list ap;
2800
2801 gcc_assert (n > 0 && n <= MAX_DIM);
2802 va_start (ap, n);
2803
2804 ret->n = n;
2805 for (i = 0; i < n; i++)
2806 ret->fns[i] = va_arg (ap, affine_fn);
2807 va_end (ap);
2808
2809 return ret;
2810 }
2811
2812 /* Returns constant affine function with value CST. */
2813
2814 static affine_fn
2815 affine_fn_cst (tree cst)
2816 {
2817 affine_fn fn;
2818 fn.create (1);
2819 fn.quick_push (cst);
2820 return fn;
2821 }
2822
2823 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2824
2825 static affine_fn
2826 affine_fn_univar (tree cst, unsigned dim, tree coef)
2827 {
2828 affine_fn fn;
2829 fn.create (dim + 1);
2830 unsigned i;
2831
2832 gcc_assert (dim > 0);
2833 fn.quick_push (cst);
2834 for (i = 1; i < dim; i++)
2835 fn.quick_push (integer_zero_node);
2836 fn.quick_push (coef);
2837 return fn;
2838 }
2839
2840 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2841 *OVERLAPS_B are initialized to the functions that describe the
2842 relation between the elements accessed twice by CHREC_A and
2843 CHREC_B. For k >= 0, the following property is verified:
2844
2845 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2846
2847 static void
2848 analyze_ziv_subscript (tree chrec_a,
2849 tree chrec_b,
2850 conflict_function **overlaps_a,
2851 conflict_function **overlaps_b,
2852 tree *last_conflicts)
2853 {
2854 tree type, difference;
2855 dependence_stats.num_ziv++;
2856
2857 if (dump_file && (dump_flags & TDF_DETAILS))
2858 fprintf (dump_file, "(analyze_ziv_subscript \n");
2859
2860 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2861 chrec_a = chrec_convert (type, chrec_a, NULL);
2862 chrec_b = chrec_convert (type, chrec_b, NULL);
2863 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2864
2865 switch (TREE_CODE (difference))
2866 {
2867 case INTEGER_CST:
2868 if (integer_zerop (difference))
2869 {
2870 /* The difference is equal to zero: the accessed index
2871 overlaps for each iteration in the loop. */
2872 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2873 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2874 *last_conflicts = chrec_dont_know;
2875 dependence_stats.num_ziv_dependent++;
2876 }
2877 else
2878 {
2879 /* The accesses do not overlap. */
2880 *overlaps_a = conflict_fn_no_dependence ();
2881 *overlaps_b = conflict_fn_no_dependence ();
2882 *last_conflicts = integer_zero_node;
2883 dependence_stats.num_ziv_independent++;
2884 }
2885 break;
2886
2887 default:
2888 /* We're not sure whether the indexes overlap. For the moment,
2889 conservatively answer "don't know". */
2890 if (dump_file && (dump_flags & TDF_DETAILS))
2891 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2892
2893 *overlaps_a = conflict_fn_not_known ();
2894 *overlaps_b = conflict_fn_not_known ();
2895 *last_conflicts = chrec_dont_know;
2896 dependence_stats.num_ziv_unimplemented++;
2897 break;
2898 }
2899
2900 if (dump_file && (dump_flags & TDF_DETAILS))
2901 fprintf (dump_file, ")\n");
2902 }
2903
2904 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2905 and only if it fits to the int type. If this is not the case, or the
2906 bound on the number of iterations of LOOP could not be derived, returns
2907 chrec_dont_know. */
2908
2909 static tree
2910 max_stmt_executions_tree (class loop *loop)
2911 {
2912 widest_int nit;
2913
2914 if (!max_stmt_executions (loop, &nit))
2915 return chrec_dont_know;
2916
2917 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2918 return chrec_dont_know;
2919
2920 return wide_int_to_tree (unsigned_type_node, nit);
2921 }
2922
2923 /* Determine whether the CHREC is always positive/negative. If the expression
2924 cannot be statically analyzed, return false, otherwise set the answer into
2925 VALUE. */
2926
2927 static bool
2928 chrec_is_positive (tree chrec, bool *value)
2929 {
2930 bool value0, value1, value2;
2931 tree end_value, nb_iter;
2932
2933 switch (TREE_CODE (chrec))
2934 {
2935 case POLYNOMIAL_CHREC:
2936 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2937 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2938 return false;
2939
2940 /* FIXME -- overflows. */
2941 if (value0 == value1)
2942 {
2943 *value = value0;
2944 return true;
2945 }
2946
2947 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2948 and the proof consists in showing that the sign never
2949 changes during the execution of the loop, from 0 to
2950 loop->nb_iterations. */
2951 if (!evolution_function_is_affine_p (chrec))
2952 return false;
2953
2954 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2955 if (chrec_contains_undetermined (nb_iter))
2956 return false;
2957
2958 #if 0
2959 /* TODO -- If the test is after the exit, we may decrease the number of
2960 iterations by one. */
2961 if (after_exit)
2962 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2963 #endif
2964
2965 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2966
2967 if (!chrec_is_positive (end_value, &value2))
2968 return false;
2969
2970 *value = value0;
2971 return value0 == value1;
2972
2973 case INTEGER_CST:
2974 switch (tree_int_cst_sgn (chrec))
2975 {
2976 case -1:
2977 *value = false;
2978 break;
2979 case 1:
2980 *value = true;
2981 break;
2982 default:
2983 return false;
2984 }
2985 return true;
2986
2987 default:
2988 return false;
2989 }
2990 }
2991
2992
2993 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2994 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2995 *OVERLAPS_B are initialized to the functions that describe the
2996 relation between the elements accessed twice by CHREC_A and
2997 CHREC_B. For k >= 0, the following property is verified:
2998
2999 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3000
3001 static void
3002 analyze_siv_subscript_cst_affine (tree chrec_a,
3003 tree chrec_b,
3004 conflict_function **overlaps_a,
3005 conflict_function **overlaps_b,
3006 tree *last_conflicts)
3007 {
3008 bool value0, value1, value2;
3009 tree type, difference, tmp;
3010
3011 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3012 chrec_a = chrec_convert (type, chrec_a, NULL);
3013 chrec_b = chrec_convert (type, chrec_b, NULL);
3014 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3015
3016 /* Special case overlap in the first iteration. */
3017 if (integer_zerop (difference))
3018 {
3019 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3020 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3021 *last_conflicts = integer_one_node;
3022 return;
3023 }
3024
3025 if (!chrec_is_positive (initial_condition (difference), &value0))
3026 {
3027 if (dump_file && (dump_flags & TDF_DETAILS))
3028 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3029
3030 dependence_stats.num_siv_unimplemented++;
3031 *overlaps_a = conflict_fn_not_known ();
3032 *overlaps_b = conflict_fn_not_known ();
3033 *last_conflicts = chrec_dont_know;
3034 return;
3035 }
3036 else
3037 {
3038 if (value0 == false)
3039 {
3040 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3041 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3042 {
3043 if (dump_file && (dump_flags & TDF_DETAILS))
3044 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3045
3046 *overlaps_a = conflict_fn_not_known ();
3047 *overlaps_b = conflict_fn_not_known ();
3048 *last_conflicts = chrec_dont_know;
3049 dependence_stats.num_siv_unimplemented++;
3050 return;
3051 }
3052 else
3053 {
3054 if (value1 == true)
3055 {
3056 /* Example:
3057 chrec_a = 12
3058 chrec_b = {10, +, 1}
3059 */
3060
3061 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3062 {
3063 HOST_WIDE_INT numiter;
3064 class loop *loop = get_chrec_loop (chrec_b);
3065
3066 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3067 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3068 fold_build1 (ABS_EXPR, type, difference),
3069 CHREC_RIGHT (chrec_b));
3070 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3071 *last_conflicts = integer_one_node;
3072
3073
3074 /* Perform weak-zero siv test to see if overlap is
3075 outside the loop bounds. */
3076 numiter = max_stmt_executions_int (loop);
3077
3078 if (numiter >= 0
3079 && compare_tree_int (tmp, numiter) > 0)
3080 {
3081 free_conflict_function (*overlaps_a);
3082 free_conflict_function (*overlaps_b);
3083 *overlaps_a = conflict_fn_no_dependence ();
3084 *overlaps_b = conflict_fn_no_dependence ();
3085 *last_conflicts = integer_zero_node;
3086 dependence_stats.num_siv_independent++;
3087 return;
3088 }
3089 dependence_stats.num_siv_dependent++;
3090 return;
3091 }
3092
3093 /* When the step does not divide the difference, there are
3094 no overlaps. */
3095 else
3096 {
3097 *overlaps_a = conflict_fn_no_dependence ();
3098 *overlaps_b = conflict_fn_no_dependence ();
3099 *last_conflicts = integer_zero_node;
3100 dependence_stats.num_siv_independent++;
3101 return;
3102 }
3103 }
3104
3105 else
3106 {
3107 /* Example:
3108 chrec_a = 12
3109 chrec_b = {10, +, -1}
3110
3111 In this case, chrec_a will not overlap with chrec_b. */
3112 *overlaps_a = conflict_fn_no_dependence ();
3113 *overlaps_b = conflict_fn_no_dependence ();
3114 *last_conflicts = integer_zero_node;
3115 dependence_stats.num_siv_independent++;
3116 return;
3117 }
3118 }
3119 }
3120 else
3121 {
3122 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3123 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3124 {
3125 if (dump_file && (dump_flags & TDF_DETAILS))
3126 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3127
3128 *overlaps_a = conflict_fn_not_known ();
3129 *overlaps_b = conflict_fn_not_known ();
3130 *last_conflicts = chrec_dont_know;
3131 dependence_stats.num_siv_unimplemented++;
3132 return;
3133 }
3134 else
3135 {
3136 if (value2 == false)
3137 {
3138 /* Example:
3139 chrec_a = 3
3140 chrec_b = {10, +, -1}
3141 */
3142 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3143 {
3144 HOST_WIDE_INT numiter;
3145 class loop *loop = get_chrec_loop (chrec_b);
3146
3147 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3148 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3149 CHREC_RIGHT (chrec_b));
3150 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3151 *last_conflicts = integer_one_node;
3152
3153 /* Perform weak-zero siv test to see if overlap is
3154 outside the loop bounds. */
3155 numiter = max_stmt_executions_int (loop);
3156
3157 if (numiter >= 0
3158 && compare_tree_int (tmp, numiter) > 0)
3159 {
3160 free_conflict_function (*overlaps_a);
3161 free_conflict_function (*overlaps_b);
3162 *overlaps_a = conflict_fn_no_dependence ();
3163 *overlaps_b = conflict_fn_no_dependence ();
3164 *last_conflicts = integer_zero_node;
3165 dependence_stats.num_siv_independent++;
3166 return;
3167 }
3168 dependence_stats.num_siv_dependent++;
3169 return;
3170 }
3171
3172 /* When the step does not divide the difference, there
3173 are no overlaps. */
3174 else
3175 {
3176 *overlaps_a = conflict_fn_no_dependence ();
3177 *overlaps_b = conflict_fn_no_dependence ();
3178 *last_conflicts = integer_zero_node;
3179 dependence_stats.num_siv_independent++;
3180 return;
3181 }
3182 }
3183 else
3184 {
3185 /* Example:
3186 chrec_a = 3
3187 chrec_b = {4, +, 1}
3188
3189 In this case, chrec_a will not overlap with chrec_b. */
3190 *overlaps_a = conflict_fn_no_dependence ();
3191 *overlaps_b = conflict_fn_no_dependence ();
3192 *last_conflicts = integer_zero_node;
3193 dependence_stats.num_siv_independent++;
3194 return;
3195 }
3196 }
3197 }
3198 }
3199 }
3200
3201 /* Helper recursive function for initializing the matrix A. Returns
3202 the initial value of CHREC. */
3203
3204 static tree
3205 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3206 {
3207 gcc_assert (chrec);
3208
3209 switch (TREE_CODE (chrec))
3210 {
3211 case POLYNOMIAL_CHREC:
3212 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3213 return chrec_dont_know;
3214 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3215 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3216
3217 case PLUS_EXPR:
3218 case MULT_EXPR:
3219 case MINUS_EXPR:
3220 {
3221 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3222 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3223
3224 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3225 }
3226
3227 CASE_CONVERT:
3228 {
3229 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3230 return chrec_convert (chrec_type (chrec), op, NULL);
3231 }
3232
3233 case BIT_NOT_EXPR:
3234 {
3235 /* Handle ~X as -1 - X. */
3236 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3237 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3238 build_int_cst (TREE_TYPE (chrec), -1), op);
3239 }
3240
3241 case INTEGER_CST:
3242 return chrec;
3243
3244 default:
3245 gcc_unreachable ();
3246 return NULL_TREE;
3247 }
3248 }
3249
3250 #define FLOOR_DIV(x,y) ((x) / (y))
3251
3252 /* Solves the special case of the Diophantine equation:
3253 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3254
3255 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3256 number of iterations that loops X and Y run. The overlaps will be
3257 constructed as evolutions in dimension DIM. */
3258
3259 static void
3260 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3261 HOST_WIDE_INT step_a,
3262 HOST_WIDE_INT step_b,
3263 affine_fn *overlaps_a,
3264 affine_fn *overlaps_b,
3265 tree *last_conflicts, int dim)
3266 {
3267 if (((step_a > 0 && step_b > 0)
3268 || (step_a < 0 && step_b < 0)))
3269 {
3270 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3271 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3272
3273 gcd_steps_a_b = gcd (step_a, step_b);
3274 step_overlaps_a = step_b / gcd_steps_a_b;
3275 step_overlaps_b = step_a / gcd_steps_a_b;
3276
3277 if (niter > 0)
3278 {
3279 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3280 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3281 last_conflict = tau2;
3282 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3283 }
3284 else
3285 *last_conflicts = chrec_dont_know;
3286
3287 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3288 build_int_cst (NULL_TREE,
3289 step_overlaps_a));
3290 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3291 build_int_cst (NULL_TREE,
3292 step_overlaps_b));
3293 }
3294
3295 else
3296 {
3297 *overlaps_a = affine_fn_cst (integer_zero_node);
3298 *overlaps_b = affine_fn_cst (integer_zero_node);
3299 *last_conflicts = integer_zero_node;
3300 }
3301 }
3302
3303 /* Solves the special case of a Diophantine equation where CHREC_A is
3304 an affine bivariate function, and CHREC_B is an affine univariate
3305 function. For example,
3306
3307 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3308
3309 has the following overlapping functions:
3310
3311 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3312 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3313 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3314
3315 FORNOW: This is a specialized implementation for a case occurring in
3316 a common benchmark. Implement the general algorithm. */
3317
3318 static void
3319 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3320 conflict_function **overlaps_a,
3321 conflict_function **overlaps_b,
3322 tree *last_conflicts)
3323 {
3324 bool xz_p, yz_p, xyz_p;
3325 HOST_WIDE_INT step_x, step_y, step_z;
3326 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3327 affine_fn overlaps_a_xz, overlaps_b_xz;
3328 affine_fn overlaps_a_yz, overlaps_b_yz;
3329 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3330 affine_fn ova1, ova2, ovb;
3331 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3332
3333 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3334 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3335 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3336
3337 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3338 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3339 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3340
3341 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3342 {
3343 if (dump_file && (dump_flags & TDF_DETAILS))
3344 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3345
3346 *overlaps_a = conflict_fn_not_known ();
3347 *overlaps_b = conflict_fn_not_known ();
3348 *last_conflicts = chrec_dont_know;
3349 return;
3350 }
3351
3352 niter = MIN (niter_x, niter_z);
3353 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3354 &overlaps_a_xz,
3355 &overlaps_b_xz,
3356 &last_conflicts_xz, 1);
3357 niter = MIN (niter_y, niter_z);
3358 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3359 &overlaps_a_yz,
3360 &overlaps_b_yz,
3361 &last_conflicts_yz, 2);
3362 niter = MIN (niter_x, niter_z);
3363 niter = MIN (niter_y, niter);
3364 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3365 &overlaps_a_xyz,
3366 &overlaps_b_xyz,
3367 &last_conflicts_xyz, 3);
3368
3369 xz_p = !integer_zerop (last_conflicts_xz);
3370 yz_p = !integer_zerop (last_conflicts_yz);
3371 xyz_p = !integer_zerop (last_conflicts_xyz);
3372
3373 if (xz_p || yz_p || xyz_p)
3374 {
3375 ova1 = affine_fn_cst (integer_zero_node);
3376 ova2 = affine_fn_cst (integer_zero_node);
3377 ovb = affine_fn_cst (integer_zero_node);
3378 if (xz_p)
3379 {
3380 affine_fn t0 = ova1;
3381 affine_fn t2 = ovb;
3382
3383 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3384 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3385 affine_fn_free (t0);
3386 affine_fn_free (t2);
3387 *last_conflicts = last_conflicts_xz;
3388 }
3389 if (yz_p)
3390 {
3391 affine_fn t0 = ova2;
3392 affine_fn t2 = ovb;
3393
3394 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3395 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3396 affine_fn_free (t0);
3397 affine_fn_free (t2);
3398 *last_conflicts = last_conflicts_yz;
3399 }
3400 if (xyz_p)
3401 {
3402 affine_fn t0 = ova1;
3403 affine_fn t2 = ova2;
3404 affine_fn t4 = ovb;
3405
3406 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3407 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3408 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3409 affine_fn_free (t0);
3410 affine_fn_free (t2);
3411 affine_fn_free (t4);
3412 *last_conflicts = last_conflicts_xyz;
3413 }
3414 *overlaps_a = conflict_fn (2, ova1, ova2);
3415 *overlaps_b = conflict_fn (1, ovb);
3416 }
3417 else
3418 {
3419 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3420 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3421 *last_conflicts = integer_zero_node;
3422 }
3423
3424 affine_fn_free (overlaps_a_xz);
3425 affine_fn_free (overlaps_b_xz);
3426 affine_fn_free (overlaps_a_yz);
3427 affine_fn_free (overlaps_b_yz);
3428 affine_fn_free (overlaps_a_xyz);
3429 affine_fn_free (overlaps_b_xyz);
3430 }
3431
3432 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3433
3434 static void
3435 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3436 int size)
3437 {
3438 memcpy (vec2, vec1, size * sizeof (*vec1));
3439 }
3440
3441 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3442
3443 static void
3444 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3445 int m, int n)
3446 {
3447 int i;
3448
3449 for (i = 0; i < m; i++)
3450 lambda_vector_copy (mat1[i], mat2[i], n);
3451 }
3452
3453 /* Store the N x N identity matrix in MAT. */
3454
3455 static void
3456 lambda_matrix_id (lambda_matrix mat, int size)
3457 {
3458 int i, j;
3459
3460 for (i = 0; i < size; i++)
3461 for (j = 0; j < size; j++)
3462 mat[i][j] = (i == j) ? 1 : 0;
3463 }
3464
3465 /* Return the index of the first nonzero element of vector VEC1 between
3466 START and N. We must have START <= N.
3467 Returns N if VEC1 is the zero vector. */
3468
3469 static int
3470 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3471 {
3472 int j = start;
3473 while (j < n && vec1[j] == 0)
3474 j++;
3475 return j;
3476 }
3477
3478 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3479 R2 = R2 + CONST1 * R1. */
3480
3481 static void
3482 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3483 lambda_int const1)
3484 {
3485 int i;
3486
3487 if (const1 == 0)
3488 return;
3489
3490 for (i = 0; i < n; i++)
3491 mat[r2][i] += const1 * mat[r1][i];
3492 }
3493
3494 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3495 and store the result in VEC2. */
3496
3497 static void
3498 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3499 int size, lambda_int const1)
3500 {
3501 int i;
3502
3503 if (const1 == 0)
3504 lambda_vector_clear (vec2, size);
3505 else
3506 for (i = 0; i < size; i++)
3507 vec2[i] = const1 * vec1[i];
3508 }
3509
3510 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3511
3512 static void
3513 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3514 int size)
3515 {
3516 lambda_vector_mult_const (vec1, vec2, size, -1);
3517 }
3518
3519 /* Negate row R1 of matrix MAT which has N columns. */
3520
3521 static void
3522 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3523 {
3524 lambda_vector_negate (mat[r1], mat[r1], n);
3525 }
3526
3527 /* Return true if two vectors are equal. */
3528
3529 static bool
3530 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3531 {
3532 int i;
3533 for (i = 0; i < size; i++)
3534 if (vec1[i] != vec2[i])
3535 return false;
3536 return true;
3537 }
3538
3539 /* Given an M x N integer matrix A, this function determines an M x
3540 M unimodular matrix U, and an M x N echelon matrix S such that
3541 "U.A = S". This decomposition is also known as "right Hermite".
3542
3543 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3544 Restructuring Compilers" Utpal Banerjee. */
3545
3546 static void
3547 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3548 lambda_matrix S, lambda_matrix U)
3549 {
3550 int i, j, i0 = 0;
3551
3552 lambda_matrix_copy (A, S, m, n);
3553 lambda_matrix_id (U, m);
3554
3555 for (j = 0; j < n; j++)
3556 {
3557 if (lambda_vector_first_nz (S[j], m, i0) < m)
3558 {
3559 ++i0;
3560 for (i = m - 1; i >= i0; i--)
3561 {
3562 while (S[i][j] != 0)
3563 {
3564 lambda_int sigma, factor, a, b;
3565
3566 a = S[i-1][j];
3567 b = S[i][j];
3568 sigma = (a * b < 0) ? -1: 1;
3569 a = abs_hwi (a);
3570 b = abs_hwi (b);
3571 factor = sigma * (a / b);
3572
3573 lambda_matrix_row_add (S, n, i, i-1, -factor);
3574 std::swap (S[i], S[i-1]);
3575
3576 lambda_matrix_row_add (U, m, i, i-1, -factor);
3577 std::swap (U[i], U[i-1]);
3578 }
3579 }
3580 }
3581 }
3582 }
3583
3584 /* Determines the overlapping elements due to accesses CHREC_A and
3585 CHREC_B, that are affine functions. This function cannot handle
3586 symbolic evolution functions, ie. when initial conditions are
3587 parameters, because it uses lambda matrices of integers. */
3588
3589 static void
3590 analyze_subscript_affine_affine (tree chrec_a,
3591 tree chrec_b,
3592 conflict_function **overlaps_a,
3593 conflict_function **overlaps_b,
3594 tree *last_conflicts)
3595 {
3596 unsigned nb_vars_a, nb_vars_b, dim;
3597 HOST_WIDE_INT gamma, gcd_alpha_beta;
3598 lambda_matrix A, U, S;
3599 struct obstack scratch_obstack;
3600
3601 if (eq_evolutions_p (chrec_a, chrec_b))
3602 {
3603 /* The accessed index overlaps for each iteration in the
3604 loop. */
3605 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3606 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3607 *last_conflicts = chrec_dont_know;
3608 return;
3609 }
3610 if (dump_file && (dump_flags & TDF_DETAILS))
3611 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3612
3613 /* For determining the initial intersection, we have to solve a
3614 Diophantine equation. This is the most time consuming part.
3615
3616 For answering to the question: "Is there a dependence?" we have
3617 to prove that there exists a solution to the Diophantine
3618 equation, and that the solution is in the iteration domain,
3619 i.e. the solution is positive or zero, and that the solution
3620 happens before the upper bound loop.nb_iterations. Otherwise
3621 there is no dependence. This function outputs a description of
3622 the iterations that hold the intersections. */
3623
3624 nb_vars_a = nb_vars_in_chrec (chrec_a);
3625 nb_vars_b = nb_vars_in_chrec (chrec_b);
3626
3627 gcc_obstack_init (&scratch_obstack);
3628
3629 dim = nb_vars_a + nb_vars_b;
3630 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3631 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3632 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3633
3634 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
3635 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
3636 if (init_a == chrec_dont_know
3637 || init_b == chrec_dont_know)
3638 {
3639 if (dump_file && (dump_flags & TDF_DETAILS))
3640 fprintf (dump_file, "affine-affine test failed: "
3641 "representation issue.\n");
3642 *overlaps_a = conflict_fn_not_known ();
3643 *overlaps_b = conflict_fn_not_known ();
3644 *last_conflicts = chrec_dont_know;
3645 goto end_analyze_subs_aa;
3646 }
3647 gamma = int_cst_value (init_b) - int_cst_value (init_a);
3648
3649 /* Don't do all the hard work of solving the Diophantine equation
3650 when we already know the solution: for example,
3651 | {3, +, 1}_1
3652 | {3, +, 4}_2
3653 | gamma = 3 - 3 = 0.
3654 Then the first overlap occurs during the first iterations:
3655 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3656 */
3657 if (gamma == 0)
3658 {
3659 if (nb_vars_a == 1 && nb_vars_b == 1)
3660 {
3661 HOST_WIDE_INT step_a, step_b;
3662 HOST_WIDE_INT niter, niter_a, niter_b;
3663 affine_fn ova, ovb;
3664
3665 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3666 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3667 niter = MIN (niter_a, niter_b);
3668 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3669 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3670
3671 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3672 &ova, &ovb,
3673 last_conflicts, 1);
3674 *overlaps_a = conflict_fn (1, ova);
3675 *overlaps_b = conflict_fn (1, ovb);
3676 }
3677
3678 else if (nb_vars_a == 2 && nb_vars_b == 1)
3679 compute_overlap_steps_for_affine_1_2
3680 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3681
3682 else if (nb_vars_a == 1 && nb_vars_b == 2)
3683 compute_overlap_steps_for_affine_1_2
3684 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3685
3686 else
3687 {
3688 if (dump_file && (dump_flags & TDF_DETAILS))
3689 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3690 *overlaps_a = conflict_fn_not_known ();
3691 *overlaps_b = conflict_fn_not_known ();
3692 *last_conflicts = chrec_dont_know;
3693 }
3694 goto end_analyze_subs_aa;
3695 }
3696
3697 /* U.A = S */
3698 lambda_matrix_right_hermite (A, dim, 1, S, U);
3699
3700 if (S[0][0] < 0)
3701 {
3702 S[0][0] *= -1;
3703 lambda_matrix_row_negate (U, dim, 0);
3704 }
3705 gcd_alpha_beta = S[0][0];
3706
3707 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3708 but that is a quite strange case. Instead of ICEing, answer
3709 don't know. */
3710 if (gcd_alpha_beta == 0)
3711 {
3712 *overlaps_a = conflict_fn_not_known ();
3713 *overlaps_b = conflict_fn_not_known ();
3714 *last_conflicts = chrec_dont_know;
3715 goto end_analyze_subs_aa;
3716 }
3717
3718 /* The classic "gcd-test". */
3719 if (!int_divides_p (gcd_alpha_beta, gamma))
3720 {
3721 /* The "gcd-test" has determined that there is no integer
3722 solution, i.e. there is no dependence. */
3723 *overlaps_a = conflict_fn_no_dependence ();
3724 *overlaps_b = conflict_fn_no_dependence ();
3725 *last_conflicts = integer_zero_node;
3726 }
3727
3728 /* Both access functions are univariate. This includes SIV and MIV cases. */
3729 else if (nb_vars_a == 1 && nb_vars_b == 1)
3730 {
3731 /* Both functions should have the same evolution sign. */
3732 if (((A[0][0] > 0 && -A[1][0] > 0)
3733 || (A[0][0] < 0 && -A[1][0] < 0)))
3734 {
3735 /* The solutions are given by:
3736 |
3737 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3738 | [u21 u22] [y0]
3739
3740 For a given integer t. Using the following variables,
3741
3742 | i0 = u11 * gamma / gcd_alpha_beta
3743 | j0 = u12 * gamma / gcd_alpha_beta
3744 | i1 = u21
3745 | j1 = u22
3746
3747 the solutions are:
3748
3749 | x0 = i0 + i1 * t,
3750 | y0 = j0 + j1 * t. */
3751 HOST_WIDE_INT i0, j0, i1, j1;
3752
3753 i0 = U[0][0] * gamma / gcd_alpha_beta;
3754 j0 = U[0][1] * gamma / gcd_alpha_beta;
3755 i1 = U[1][0];
3756 j1 = U[1][1];
3757
3758 if ((i1 == 0 && i0 < 0)
3759 || (j1 == 0 && j0 < 0))
3760 {
3761 /* There is no solution.
3762 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3763 falls in here, but for the moment we don't look at the
3764 upper bound of the iteration domain. */
3765 *overlaps_a = conflict_fn_no_dependence ();
3766 *overlaps_b = conflict_fn_no_dependence ();
3767 *last_conflicts = integer_zero_node;
3768 goto end_analyze_subs_aa;
3769 }
3770
3771 if (i1 > 0 && j1 > 0)
3772 {
3773 HOST_WIDE_INT niter_a
3774 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3775 HOST_WIDE_INT niter_b
3776 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3777 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3778
3779 /* (X0, Y0) is a solution of the Diophantine equation:
3780 "chrec_a (X0) = chrec_b (Y0)". */
3781 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3782 CEIL (-j0, j1));
3783 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3784 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3785
3786 /* (X1, Y1) is the smallest positive solution of the eq
3787 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3788 first conflict occurs. */
3789 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3790 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3791 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3792
3793 if (niter > 0)
3794 {
3795 /* If the overlap occurs outside of the bounds of the
3796 loop, there is no dependence. */
3797 if (x1 >= niter_a || y1 >= niter_b)
3798 {
3799 *overlaps_a = conflict_fn_no_dependence ();
3800 *overlaps_b = conflict_fn_no_dependence ();
3801 *last_conflicts = integer_zero_node;
3802 goto end_analyze_subs_aa;
3803 }
3804
3805 /* max stmt executions can get quite large, avoid
3806 overflows by using wide ints here. */
3807 widest_int tau2
3808 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
3809 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
3810 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
3811 if (wi::min_precision (last_conflict, SIGNED)
3812 <= TYPE_PRECISION (integer_type_node))
3813 *last_conflicts
3814 = build_int_cst (integer_type_node,
3815 last_conflict.to_shwi ());
3816 else
3817 *last_conflicts = chrec_dont_know;
3818 }
3819 else
3820 *last_conflicts = chrec_dont_know;
3821
3822 *overlaps_a
3823 = conflict_fn (1,
3824 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3825 1,
3826 build_int_cst (NULL_TREE, i1)));
3827 *overlaps_b
3828 = conflict_fn (1,
3829 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3830 1,
3831 build_int_cst (NULL_TREE, j1)));
3832 }
3833 else
3834 {
3835 /* FIXME: For the moment, the upper bound of the
3836 iteration domain for i and j is not checked. */
3837 if (dump_file && (dump_flags & TDF_DETAILS))
3838 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3839 *overlaps_a = conflict_fn_not_known ();
3840 *overlaps_b = conflict_fn_not_known ();
3841 *last_conflicts = chrec_dont_know;
3842 }
3843 }
3844 else
3845 {
3846 if (dump_file && (dump_flags & TDF_DETAILS))
3847 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3848 *overlaps_a = conflict_fn_not_known ();
3849 *overlaps_b = conflict_fn_not_known ();
3850 *last_conflicts = chrec_dont_know;
3851 }
3852 }
3853 else
3854 {
3855 if (dump_file && (dump_flags & TDF_DETAILS))
3856 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3857 *overlaps_a = conflict_fn_not_known ();
3858 *overlaps_b = conflict_fn_not_known ();
3859 *last_conflicts = chrec_dont_know;
3860 }
3861
3862 end_analyze_subs_aa:
3863 obstack_free (&scratch_obstack, NULL);
3864 if (dump_file && (dump_flags & TDF_DETAILS))
3865 {
3866 fprintf (dump_file, " (overlaps_a = ");
3867 dump_conflict_function (dump_file, *overlaps_a);
3868 fprintf (dump_file, ")\n (overlaps_b = ");
3869 dump_conflict_function (dump_file, *overlaps_b);
3870 fprintf (dump_file, "))\n");
3871 }
3872 }
3873
3874 /* Returns true when analyze_subscript_affine_affine can be used for
3875 determining the dependence relation between chrec_a and chrec_b,
3876 that contain symbols. This function modifies chrec_a and chrec_b
3877 such that the analysis result is the same, and such that they don't
3878 contain symbols, and then can safely be passed to the analyzer.
3879
3880 Example: The analysis of the following tuples of evolutions produce
3881 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3882 vs. {0, +, 1}_1
3883
3884 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3885 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3886 */
3887
3888 static bool
3889 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3890 {
3891 tree diff, type, left_a, left_b, right_b;
3892
3893 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3894 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3895 /* FIXME: For the moment not handled. Might be refined later. */
3896 return false;
3897
3898 type = chrec_type (*chrec_a);
3899 left_a = CHREC_LEFT (*chrec_a);
3900 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3901 diff = chrec_fold_minus (type, left_a, left_b);
3902
3903 if (!evolution_function_is_constant_p (diff))
3904 return false;
3905
3906 if (dump_file && (dump_flags & TDF_DETAILS))
3907 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3908
3909 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3910 diff, CHREC_RIGHT (*chrec_a));
3911 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3912 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3913 build_int_cst (type, 0),
3914 right_b);
3915 return true;
3916 }
3917
3918 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3919 *OVERLAPS_B are initialized to the functions that describe the
3920 relation between the elements accessed twice by CHREC_A and
3921 CHREC_B. For k >= 0, the following property is verified:
3922
3923 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3924
3925 static void
3926 analyze_siv_subscript (tree chrec_a,
3927 tree chrec_b,
3928 conflict_function **overlaps_a,
3929 conflict_function **overlaps_b,
3930 tree *last_conflicts,
3931 int loop_nest_num)
3932 {
3933 dependence_stats.num_siv++;
3934
3935 if (dump_file && (dump_flags & TDF_DETAILS))
3936 fprintf (dump_file, "(analyze_siv_subscript \n");
3937
3938 if (evolution_function_is_constant_p (chrec_a)
3939 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3940 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3941 overlaps_a, overlaps_b, last_conflicts);
3942
3943 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3944 && evolution_function_is_constant_p (chrec_b))
3945 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3946 overlaps_b, overlaps_a, last_conflicts);
3947
3948 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3949 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3950 {
3951 if (!chrec_contains_symbols (chrec_a)
3952 && !chrec_contains_symbols (chrec_b))
3953 {
3954 analyze_subscript_affine_affine (chrec_a, chrec_b,
3955 overlaps_a, overlaps_b,
3956 last_conflicts);
3957
3958 if (CF_NOT_KNOWN_P (*overlaps_a)
3959 || CF_NOT_KNOWN_P (*overlaps_b))
3960 dependence_stats.num_siv_unimplemented++;
3961 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3962 || CF_NO_DEPENDENCE_P (*overlaps_b))
3963 dependence_stats.num_siv_independent++;
3964 else
3965 dependence_stats.num_siv_dependent++;
3966 }
3967 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3968 &chrec_b))
3969 {
3970 analyze_subscript_affine_affine (chrec_a, chrec_b,
3971 overlaps_a, overlaps_b,
3972 last_conflicts);
3973
3974 if (CF_NOT_KNOWN_P (*overlaps_a)
3975 || CF_NOT_KNOWN_P (*overlaps_b))
3976 dependence_stats.num_siv_unimplemented++;
3977 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3978 || CF_NO_DEPENDENCE_P (*overlaps_b))
3979 dependence_stats.num_siv_independent++;
3980 else
3981 dependence_stats.num_siv_dependent++;
3982 }
3983 else
3984 goto siv_subscript_dontknow;
3985 }
3986
3987 else
3988 {
3989 siv_subscript_dontknow:;
3990 if (dump_file && (dump_flags & TDF_DETAILS))
3991 fprintf (dump_file, " siv test failed: unimplemented");
3992 *overlaps_a = conflict_fn_not_known ();
3993 *overlaps_b = conflict_fn_not_known ();
3994 *last_conflicts = chrec_dont_know;
3995 dependence_stats.num_siv_unimplemented++;
3996 }
3997
3998 if (dump_file && (dump_flags & TDF_DETAILS))
3999 fprintf (dump_file, ")\n");
4000 }
4001
4002 /* Returns false if we can prove that the greatest common divisor of the steps
4003 of CHREC does not divide CST, false otherwise. */
4004
4005 static bool
4006 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4007 {
4008 HOST_WIDE_INT cd = 0, val;
4009 tree step;
4010
4011 if (!tree_fits_shwi_p (cst))
4012 return true;
4013 val = tree_to_shwi (cst);
4014
4015 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4016 {
4017 step = CHREC_RIGHT (chrec);
4018 if (!tree_fits_shwi_p (step))
4019 return true;
4020 cd = gcd (cd, tree_to_shwi (step));
4021 chrec = CHREC_LEFT (chrec);
4022 }
4023
4024 return val % cd == 0;
4025 }
4026
4027 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4028 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4029 functions that describe the relation between the elements accessed
4030 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4031 is verified:
4032
4033 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4034
4035 static void
4036 analyze_miv_subscript (tree chrec_a,
4037 tree chrec_b,
4038 conflict_function **overlaps_a,
4039 conflict_function **overlaps_b,
4040 tree *last_conflicts,
4041 class loop *loop_nest)
4042 {
4043 tree type, difference;
4044
4045 dependence_stats.num_miv++;
4046 if (dump_file && (dump_flags & TDF_DETAILS))
4047 fprintf (dump_file, "(analyze_miv_subscript \n");
4048
4049 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4050 chrec_a = chrec_convert (type, chrec_a, NULL);
4051 chrec_b = chrec_convert (type, chrec_b, NULL);
4052 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4053
4054 if (eq_evolutions_p (chrec_a, chrec_b))
4055 {
4056 /* Access functions are the same: all the elements are accessed
4057 in the same order. */
4058 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4059 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4060 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4061 dependence_stats.num_miv_dependent++;
4062 }
4063
4064 else if (evolution_function_is_constant_p (difference)
4065 && evolution_function_is_affine_multivariate_p (chrec_a,
4066 loop_nest->num)
4067 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4068 {
4069 /* testsuite/.../ssa-chrec-33.c
4070 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4071
4072 The difference is 1, and all the evolution steps are multiples
4073 of 2, consequently there are no overlapping elements. */
4074 *overlaps_a = conflict_fn_no_dependence ();
4075 *overlaps_b = conflict_fn_no_dependence ();
4076 *last_conflicts = integer_zero_node;
4077 dependence_stats.num_miv_independent++;
4078 }
4079
4080 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4081 && !chrec_contains_symbols (chrec_a, loop_nest)
4082 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4083 && !chrec_contains_symbols (chrec_b, loop_nest))
4084 {
4085 /* testsuite/.../ssa-chrec-35.c
4086 {0, +, 1}_2 vs. {0, +, 1}_3
4087 the overlapping elements are respectively located at iterations:
4088 {0, +, 1}_x and {0, +, 1}_x,
4089 in other words, we have the equality:
4090 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4091
4092 Other examples:
4093 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4094 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4095
4096 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4097 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4098 */
4099 analyze_subscript_affine_affine (chrec_a, chrec_b,
4100 overlaps_a, overlaps_b, last_conflicts);
4101
4102 if (CF_NOT_KNOWN_P (*overlaps_a)
4103 || CF_NOT_KNOWN_P (*overlaps_b))
4104 dependence_stats.num_miv_unimplemented++;
4105 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4106 || CF_NO_DEPENDENCE_P (*overlaps_b))
4107 dependence_stats.num_miv_independent++;
4108 else
4109 dependence_stats.num_miv_dependent++;
4110 }
4111
4112 else
4113 {
4114 /* When the analysis is too difficult, answer "don't know". */
4115 if (dump_file && (dump_flags & TDF_DETAILS))
4116 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4117
4118 *overlaps_a = conflict_fn_not_known ();
4119 *overlaps_b = conflict_fn_not_known ();
4120 *last_conflicts = chrec_dont_know;
4121 dependence_stats.num_miv_unimplemented++;
4122 }
4123
4124 if (dump_file && (dump_flags & TDF_DETAILS))
4125 fprintf (dump_file, ")\n");
4126 }
4127
4128 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4129 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4130 OVERLAP_ITERATIONS_B are initialized with two functions that
4131 describe the iterations that contain conflicting elements.
4132
4133 Remark: For an integer k >= 0, the following equality is true:
4134
4135 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4136 */
4137
4138 static void
4139 analyze_overlapping_iterations (tree chrec_a,
4140 tree chrec_b,
4141 conflict_function **overlap_iterations_a,
4142 conflict_function **overlap_iterations_b,
4143 tree *last_conflicts, class loop *loop_nest)
4144 {
4145 unsigned int lnn = loop_nest->num;
4146
4147 dependence_stats.num_subscript_tests++;
4148
4149 if (dump_file && (dump_flags & TDF_DETAILS))
4150 {
4151 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4152 fprintf (dump_file, " (chrec_a = ");
4153 print_generic_expr (dump_file, chrec_a);
4154 fprintf (dump_file, ")\n (chrec_b = ");
4155 print_generic_expr (dump_file, chrec_b);
4156 fprintf (dump_file, ")\n");
4157 }
4158
4159 if (chrec_a == NULL_TREE
4160 || chrec_b == NULL_TREE
4161 || chrec_contains_undetermined (chrec_a)
4162 || chrec_contains_undetermined (chrec_b))
4163 {
4164 dependence_stats.num_subscript_undetermined++;
4165
4166 *overlap_iterations_a = conflict_fn_not_known ();
4167 *overlap_iterations_b = conflict_fn_not_known ();
4168 }
4169
4170 /* If they are the same chrec, and are affine, they overlap
4171 on every iteration. */
4172 else if (eq_evolutions_p (chrec_a, chrec_b)
4173 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4174 || operand_equal_p (chrec_a, chrec_b, 0)))
4175 {
4176 dependence_stats.num_same_subscript_function++;
4177 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4178 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4179 *last_conflicts = chrec_dont_know;
4180 }
4181
4182 /* If they aren't the same, and aren't affine, we can't do anything
4183 yet. */
4184 else if ((chrec_contains_symbols (chrec_a)
4185 || chrec_contains_symbols (chrec_b))
4186 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4187 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4188 {
4189 dependence_stats.num_subscript_undetermined++;
4190 *overlap_iterations_a = conflict_fn_not_known ();
4191 *overlap_iterations_b = conflict_fn_not_known ();
4192 }
4193
4194 else if (ziv_subscript_p (chrec_a, chrec_b))
4195 analyze_ziv_subscript (chrec_a, chrec_b,
4196 overlap_iterations_a, overlap_iterations_b,
4197 last_conflicts);
4198
4199 else if (siv_subscript_p (chrec_a, chrec_b))
4200 analyze_siv_subscript (chrec_a, chrec_b,
4201 overlap_iterations_a, overlap_iterations_b,
4202 last_conflicts, lnn);
4203
4204 else
4205 analyze_miv_subscript (chrec_a, chrec_b,
4206 overlap_iterations_a, overlap_iterations_b,
4207 last_conflicts, loop_nest);
4208
4209 if (dump_file && (dump_flags & TDF_DETAILS))
4210 {
4211 fprintf (dump_file, " (overlap_iterations_a = ");
4212 dump_conflict_function (dump_file, *overlap_iterations_a);
4213 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4214 dump_conflict_function (dump_file, *overlap_iterations_b);
4215 fprintf (dump_file, "))\n");
4216 }
4217 }
4218
4219 /* Helper function for uniquely inserting distance vectors. */
4220
4221 static void
4222 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4223 {
4224 unsigned i;
4225 lambda_vector v;
4226
4227 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4228 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4229 return;
4230
4231 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4232 }
4233
4234 /* Helper function for uniquely inserting direction vectors. */
4235
4236 static void
4237 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4238 {
4239 unsigned i;
4240 lambda_vector v;
4241
4242 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4243 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4244 return;
4245
4246 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4247 }
4248
4249 /* Add a distance of 1 on all the loops outer than INDEX. If we
4250 haven't yet determined a distance for this outer loop, push a new
4251 distance vector composed of the previous distance, and a distance
4252 of 1 for this outer loop. Example:
4253
4254 | loop_1
4255 | loop_2
4256 | A[10]
4257 | endloop_2
4258 | endloop_1
4259
4260 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4261 save (0, 1), then we have to save (1, 0). */
4262
4263 static void
4264 add_outer_distances (struct data_dependence_relation *ddr,
4265 lambda_vector dist_v, int index)
4266 {
4267 /* For each outer loop where init_v is not set, the accesses are
4268 in dependence of distance 1 in the loop. */
4269 while (--index >= 0)
4270 {
4271 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4272 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4273 save_v[index] = 1;
4274 save_dist_v (ddr, save_v);
4275 }
4276 }
4277
4278 /* Return false when fail to represent the data dependence as a
4279 distance vector. A_INDEX is the index of the first reference
4280 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4281 second reference. INIT_B is set to true when a component has been
4282 added to the distance vector DIST_V. INDEX_CARRY is then set to
4283 the index in DIST_V that carries the dependence. */
4284
4285 static bool
4286 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4287 unsigned int a_index, unsigned int b_index,
4288 lambda_vector dist_v, bool *init_b,
4289 int *index_carry)
4290 {
4291 unsigned i;
4292 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4293 class loop *loop = DDR_LOOP_NEST (ddr)[0];
4294
4295 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4296 {
4297 tree access_fn_a, access_fn_b;
4298 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4299
4300 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4301 {
4302 non_affine_dependence_relation (ddr);
4303 return false;
4304 }
4305
4306 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4307 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4308
4309 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4310 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4311 {
4312 HOST_WIDE_INT dist;
4313 int index;
4314 int var_a = CHREC_VARIABLE (access_fn_a);
4315 int var_b = CHREC_VARIABLE (access_fn_b);
4316
4317 if (var_a != var_b
4318 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4319 {
4320 non_affine_dependence_relation (ddr);
4321 return false;
4322 }
4323
4324 /* When data references are collected in a loop while data
4325 dependences are analyzed in loop nest nested in the loop, we
4326 would have more number of access functions than number of
4327 loops. Skip access functions of loops not in the loop nest.
4328
4329 See PR89725 for more information. */
4330 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4331 continue;
4332
4333 dist = int_cst_value (SUB_DISTANCE (subscript));
4334 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4335 *index_carry = MIN (index, *index_carry);
4336
4337 /* This is the subscript coupling test. If we have already
4338 recorded a distance for this loop (a distance coming from
4339 another subscript), it should be the same. For example,
4340 in the following code, there is no dependence:
4341
4342 | loop i = 0, N, 1
4343 | T[i+1][i] = ...
4344 | ... = T[i][i]
4345 | endloop
4346 */
4347 if (init_v[index] != 0 && dist_v[index] != dist)
4348 {
4349 finalize_ddr_dependent (ddr, chrec_known);
4350 return false;
4351 }
4352
4353 dist_v[index] = dist;
4354 init_v[index] = 1;
4355 *init_b = true;
4356 }
4357 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4358 {
4359 /* This can be for example an affine vs. constant dependence
4360 (T[i] vs. T[3]) that is not an affine dependence and is
4361 not representable as a distance vector. */
4362 non_affine_dependence_relation (ddr);
4363 return false;
4364 }
4365 }
4366
4367 return true;
4368 }
4369
4370 /* Return true when the DDR contains only constant access functions. */
4371
4372 static bool
4373 constant_access_functions (const struct data_dependence_relation *ddr)
4374 {
4375 unsigned i;
4376 subscript *sub;
4377
4378 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4379 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4380 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4381 return false;
4382
4383 return true;
4384 }
4385
4386 /* Helper function for the case where DDR_A and DDR_B are the same
4387 multivariate access function with a constant step. For an example
4388 see pr34635-1.c. */
4389
4390 static void
4391 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4392 {
4393 int x_1, x_2;
4394 tree c_1 = CHREC_LEFT (c_2);
4395 tree c_0 = CHREC_LEFT (c_1);
4396 lambda_vector dist_v;
4397 HOST_WIDE_INT v1, v2, cd;
4398
4399 /* Polynomials with more than 2 variables are not handled yet. When
4400 the evolution steps are parameters, it is not possible to
4401 represent the dependence using classical distance vectors. */
4402 if (TREE_CODE (c_0) != INTEGER_CST
4403 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4404 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4405 {
4406 DDR_AFFINE_P (ddr) = false;
4407 return;
4408 }
4409
4410 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4411 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4412
4413 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4414 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4415 v1 = int_cst_value (CHREC_RIGHT (c_1));
4416 v2 = int_cst_value (CHREC_RIGHT (c_2));
4417 cd = gcd (v1, v2);
4418 v1 /= cd;
4419 v2 /= cd;
4420
4421 if (v2 < 0)
4422 {
4423 v2 = -v2;
4424 v1 = -v1;
4425 }
4426
4427 dist_v[x_1] = v2;
4428 dist_v[x_2] = -v1;
4429 save_dist_v (ddr, dist_v);
4430
4431 add_outer_distances (ddr, dist_v, x_1);
4432 }
4433
4434 /* Helper function for the case where DDR_A and DDR_B are the same
4435 access functions. */
4436
4437 static void
4438 add_other_self_distances (struct data_dependence_relation *ddr)
4439 {
4440 lambda_vector dist_v;
4441 unsigned i;
4442 int index_carry = DDR_NB_LOOPS (ddr);
4443 subscript *sub;
4444 class loop *loop = DDR_LOOP_NEST (ddr)[0];
4445
4446 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4447 {
4448 tree access_fun = SUB_ACCESS_FN (sub, 0);
4449
4450 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4451 {
4452 if (!evolution_function_is_univariate_p (access_fun, loop->num))
4453 {
4454 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4455 {
4456 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4457 return;
4458 }
4459
4460 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4461
4462 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4463 add_multivariate_self_dist (ddr, access_fun);
4464 else
4465 /* The evolution step is not constant: it varies in
4466 the outer loop, so this cannot be represented by a
4467 distance vector. For example in pr34635.c the
4468 evolution is {0, +, {0, +, 4}_1}_2. */
4469 DDR_AFFINE_P (ddr) = false;
4470
4471 return;
4472 }
4473
4474 /* When data references are collected in a loop while data
4475 dependences are analyzed in loop nest nested in the loop, we
4476 would have more number of access functions than number of
4477 loops. Skip access functions of loops not in the loop nest.
4478
4479 See PR89725 for more information. */
4480 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4481 loop))
4482 continue;
4483
4484 index_carry = MIN (index_carry,
4485 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4486 DDR_LOOP_NEST (ddr)));
4487 }
4488 }
4489
4490 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4491 add_outer_distances (ddr, dist_v, index_carry);
4492 }
4493
4494 static void
4495 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4496 {
4497 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4498
4499 dist_v[0] = 1;
4500 save_dist_v (ddr, dist_v);
4501 }
4502
4503 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4504 is the case for example when access functions are the same and
4505 equal to a constant, as in:
4506
4507 | loop_1
4508 | A[3] = ...
4509 | ... = A[3]
4510 | endloop_1
4511
4512 in which case the distance vectors are (0) and (1). */
4513
4514 static void
4515 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4516 {
4517 unsigned i, j;
4518
4519 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4520 {
4521 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4522 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4523 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4524
4525 for (j = 0; j < ca->n; j++)
4526 if (affine_function_zero_p (ca->fns[j]))
4527 {
4528 insert_innermost_unit_dist_vector (ddr);
4529 return;
4530 }
4531
4532 for (j = 0; j < cb->n; j++)
4533 if (affine_function_zero_p (cb->fns[j]))
4534 {
4535 insert_innermost_unit_dist_vector (ddr);
4536 return;
4537 }
4538 }
4539 }
4540
4541 /* Return true when the DDR contains two data references that have the
4542 same access functions. */
4543
4544 static inline bool
4545 same_access_functions (const struct data_dependence_relation *ddr)
4546 {
4547 unsigned i;
4548 subscript *sub;
4549
4550 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4551 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4552 SUB_ACCESS_FN (sub, 1)))
4553 return false;
4554
4555 return true;
4556 }
4557
4558 /* Compute the classic per loop distance vector. DDR is the data
4559 dependence relation to build a vector from. Return false when fail
4560 to represent the data dependence as a distance vector. */
4561
4562 static bool
4563 build_classic_dist_vector (struct data_dependence_relation *ddr,
4564 class loop *loop_nest)
4565 {
4566 bool init_b = false;
4567 int index_carry = DDR_NB_LOOPS (ddr);
4568 lambda_vector dist_v;
4569
4570 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4571 return false;
4572
4573 if (same_access_functions (ddr))
4574 {
4575 /* Save the 0 vector. */
4576 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4577 save_dist_v (ddr, dist_v);
4578
4579 if (constant_access_functions (ddr))
4580 add_distance_for_zero_overlaps (ddr);
4581
4582 if (DDR_NB_LOOPS (ddr) > 1)
4583 add_other_self_distances (ddr);
4584
4585 return true;
4586 }
4587
4588 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4589 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4590 return false;
4591
4592 /* Save the distance vector if we initialized one. */
4593 if (init_b)
4594 {
4595 /* Verify a basic constraint: classic distance vectors should
4596 always be lexicographically positive.
4597
4598 Data references are collected in the order of execution of
4599 the program, thus for the following loop
4600
4601 | for (i = 1; i < 100; i++)
4602 | for (j = 1; j < 100; j++)
4603 | {
4604 | t = T[j+1][i-1]; // A
4605 | T[j][i] = t + 2; // B
4606 | }
4607
4608 references are collected following the direction of the wind:
4609 A then B. The data dependence tests are performed also
4610 following this order, such that we're looking at the distance
4611 separating the elements accessed by A from the elements later
4612 accessed by B. But in this example, the distance returned by
4613 test_dep (A, B) is lexicographically negative (-1, 1), that
4614 means that the access A occurs later than B with respect to
4615 the outer loop, ie. we're actually looking upwind. In this
4616 case we solve test_dep (B, A) looking downwind to the
4617 lexicographically positive solution, that returns the
4618 distance vector (1, -1). */
4619 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4620 {
4621 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4622 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4623 return false;
4624 compute_subscript_distance (ddr);
4625 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4626 &index_carry))
4627 return false;
4628 save_dist_v (ddr, save_v);
4629 DDR_REVERSED_P (ddr) = true;
4630
4631 /* In this case there is a dependence forward for all the
4632 outer loops:
4633
4634 | for (k = 1; k < 100; k++)
4635 | for (i = 1; i < 100; i++)
4636 | for (j = 1; j < 100; j++)
4637 | {
4638 | t = T[j+1][i-1]; // A
4639 | T[j][i] = t + 2; // B
4640 | }
4641
4642 the vectors are:
4643 (0, 1, -1)
4644 (1, 1, -1)
4645 (1, -1, 1)
4646 */
4647 if (DDR_NB_LOOPS (ddr) > 1)
4648 {
4649 add_outer_distances (ddr, save_v, index_carry);
4650 add_outer_distances (ddr, dist_v, index_carry);
4651 }
4652 }
4653 else
4654 {
4655 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4656 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4657
4658 if (DDR_NB_LOOPS (ddr) > 1)
4659 {
4660 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4661
4662 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4663 return false;
4664 compute_subscript_distance (ddr);
4665 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4666 &index_carry))
4667 return false;
4668
4669 save_dist_v (ddr, save_v);
4670 add_outer_distances (ddr, dist_v, index_carry);
4671 add_outer_distances (ddr, opposite_v, index_carry);
4672 }
4673 else
4674 save_dist_v (ddr, save_v);
4675 }
4676 }
4677 else
4678 {
4679 /* There is a distance of 1 on all the outer loops: Example:
4680 there is a dependence of distance 1 on loop_1 for the array A.
4681
4682 | loop_1
4683 | A[5] = ...
4684 | endloop
4685 */
4686 add_outer_distances (ddr, dist_v,
4687 lambda_vector_first_nz (dist_v,
4688 DDR_NB_LOOPS (ddr), 0));
4689 }
4690
4691 if (dump_file && (dump_flags & TDF_DETAILS))
4692 {
4693 unsigned i;
4694
4695 fprintf (dump_file, "(build_classic_dist_vector\n");
4696 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4697 {
4698 fprintf (dump_file, " dist_vector = (");
4699 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4700 DDR_NB_LOOPS (ddr));
4701 fprintf (dump_file, " )\n");
4702 }
4703 fprintf (dump_file, ")\n");
4704 }
4705
4706 return true;
4707 }
4708
4709 /* Return the direction for a given distance.
4710 FIXME: Computing dir this way is suboptimal, since dir can catch
4711 cases that dist is unable to represent. */
4712
4713 static inline enum data_dependence_direction
4714 dir_from_dist (int dist)
4715 {
4716 if (dist > 0)
4717 return dir_positive;
4718 else if (dist < 0)
4719 return dir_negative;
4720 else
4721 return dir_equal;
4722 }
4723
4724 /* Compute the classic per loop direction vector. DDR is the data
4725 dependence relation to build a vector from. */
4726
4727 static void
4728 build_classic_dir_vector (struct data_dependence_relation *ddr)
4729 {
4730 unsigned i, j;
4731 lambda_vector dist_v;
4732
4733 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4734 {
4735 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4736
4737 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4738 dir_v[j] = dir_from_dist (dist_v[j]);
4739
4740 save_dir_v (ddr, dir_v);
4741 }
4742 }
4743
4744 /* Helper function. Returns true when there is a dependence between the
4745 data references. A_INDEX is the index of the first reference (0 for
4746 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4747
4748 static bool
4749 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4750 unsigned int a_index, unsigned int b_index,
4751 class loop *loop_nest)
4752 {
4753 unsigned int i;
4754 tree last_conflicts;
4755 struct subscript *subscript;
4756 tree res = NULL_TREE;
4757
4758 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4759 {
4760 conflict_function *overlaps_a, *overlaps_b;
4761
4762 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4763 SUB_ACCESS_FN (subscript, b_index),
4764 &overlaps_a, &overlaps_b,
4765 &last_conflicts, loop_nest);
4766
4767 if (SUB_CONFLICTS_IN_A (subscript))
4768 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4769 if (SUB_CONFLICTS_IN_B (subscript))
4770 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4771
4772 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4773 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4774 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4775
4776 /* If there is any undetermined conflict function we have to
4777 give a conservative answer in case we cannot prove that
4778 no dependence exists when analyzing another subscript. */
4779 if (CF_NOT_KNOWN_P (overlaps_a)
4780 || CF_NOT_KNOWN_P (overlaps_b))
4781 {
4782 res = chrec_dont_know;
4783 continue;
4784 }
4785
4786 /* When there is a subscript with no dependence we can stop. */
4787 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4788 || CF_NO_DEPENDENCE_P (overlaps_b))
4789 {
4790 res = chrec_known;
4791 break;
4792 }
4793 }
4794
4795 if (res == NULL_TREE)
4796 return true;
4797
4798 if (res == chrec_known)
4799 dependence_stats.num_dependence_independent++;
4800 else
4801 dependence_stats.num_dependence_undetermined++;
4802 finalize_ddr_dependent (ddr, res);
4803 return false;
4804 }
4805
4806 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4807
4808 static void
4809 subscript_dependence_tester (struct data_dependence_relation *ddr,
4810 class loop *loop_nest)
4811 {
4812 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4813 dependence_stats.num_dependence_dependent++;
4814
4815 compute_subscript_distance (ddr);
4816 if (build_classic_dist_vector (ddr, loop_nest))
4817 build_classic_dir_vector (ddr);
4818 }
4819
4820 /* Returns true when all the access functions of A are affine or
4821 constant with respect to LOOP_NEST. */
4822
4823 static bool
4824 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4825 const class loop *loop_nest)
4826 {
4827 unsigned int i;
4828 vec<tree> fns = DR_ACCESS_FNS (a);
4829 tree t;
4830
4831 FOR_EACH_VEC_ELT (fns, i, t)
4832 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4833 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4834 return false;
4835
4836 return true;
4837 }
4838
4839 /* This computes the affine dependence relation between A and B with
4840 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4841 independence between two accesses, while CHREC_DONT_KNOW is used
4842 for representing the unknown relation.
4843
4844 Note that it is possible to stop the computation of the dependence
4845 relation the first time we detect a CHREC_KNOWN element for a given
4846 subscript. */
4847
4848 void
4849 compute_affine_dependence (struct data_dependence_relation *ddr,
4850 class loop *loop_nest)
4851 {
4852 struct data_reference *dra = DDR_A (ddr);
4853 struct data_reference *drb = DDR_B (ddr);
4854
4855 if (dump_file && (dump_flags & TDF_DETAILS))
4856 {
4857 fprintf (dump_file, "(compute_affine_dependence\n");
4858 fprintf (dump_file, " stmt_a: ");
4859 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4860 fprintf (dump_file, " stmt_b: ");
4861 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4862 }
4863
4864 /* Analyze only when the dependence relation is not yet known. */
4865 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4866 {
4867 dependence_stats.num_dependence_tests++;
4868
4869 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4870 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4871 subscript_dependence_tester (ddr, loop_nest);
4872
4873 /* As a last case, if the dependence cannot be determined, or if
4874 the dependence is considered too difficult to determine, answer
4875 "don't know". */
4876 else
4877 {
4878 dependence_stats.num_dependence_undetermined++;
4879
4880 if (dump_file && (dump_flags & TDF_DETAILS))
4881 {
4882 fprintf (dump_file, "Data ref a:\n");
4883 dump_data_reference (dump_file, dra);
4884 fprintf (dump_file, "Data ref b:\n");
4885 dump_data_reference (dump_file, drb);
4886 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4887 }
4888 finalize_ddr_dependent (ddr, chrec_dont_know);
4889 }
4890 }
4891
4892 if (dump_file && (dump_flags & TDF_DETAILS))
4893 {
4894 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4895 fprintf (dump_file, ") -> no dependence\n");
4896 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4897 fprintf (dump_file, ") -> dependence analysis failed\n");
4898 else
4899 fprintf (dump_file, ")\n");
4900 }
4901 }
4902
4903 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4904 the data references in DATAREFS, in the LOOP_NEST. When
4905 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4906 relations. Return true when successful, i.e. data references number
4907 is small enough to be handled. */
4908
4909 bool
4910 compute_all_dependences (vec<data_reference_p> datarefs,
4911 vec<ddr_p> *dependence_relations,
4912 vec<loop_p> loop_nest,
4913 bool compute_self_and_rr)
4914 {
4915 struct data_dependence_relation *ddr;
4916 struct data_reference *a, *b;
4917 unsigned int i, j;
4918
4919 if ((int) datarefs.length ()
4920 > param_loop_max_datarefs_for_datadeps)
4921 {
4922 struct data_dependence_relation *ddr;
4923
4924 /* Insert a single relation into dependence_relations:
4925 chrec_dont_know. */
4926 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4927 dependence_relations->safe_push (ddr);
4928 return false;
4929 }
4930
4931 FOR_EACH_VEC_ELT (datarefs, i, a)
4932 for (j = i + 1; datarefs.iterate (j, &b); j++)
4933 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4934 {
4935 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4936 dependence_relations->safe_push (ddr);
4937 if (loop_nest.exists ())
4938 compute_affine_dependence (ddr, loop_nest[0]);
4939 }
4940
4941 if (compute_self_and_rr)
4942 FOR_EACH_VEC_ELT (datarefs, i, a)
4943 {
4944 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4945 dependence_relations->safe_push (ddr);
4946 if (loop_nest.exists ())
4947 compute_affine_dependence (ddr, loop_nest[0]);
4948 }
4949
4950 return true;
4951 }
4952
4953 /* Describes a location of a memory reference. */
4954
4955 struct data_ref_loc
4956 {
4957 /* The memory reference. */
4958 tree ref;
4959
4960 /* True if the memory reference is read. */
4961 bool is_read;
4962
4963 /* True if the data reference is conditional within the containing
4964 statement, i.e. if it might not occur even when the statement
4965 is executed and runs to completion. */
4966 bool is_conditional_in_stmt;
4967 };
4968
4969
4970 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4971 true if STMT clobbers memory, false otherwise. */
4972
4973 static bool
4974 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4975 {
4976 bool clobbers_memory = false;
4977 data_ref_loc ref;
4978 tree op0, op1;
4979 enum gimple_code stmt_code = gimple_code (stmt);
4980
4981 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4982 As we cannot model data-references to not spelled out
4983 accesses give up if they may occur. */
4984 if (stmt_code == GIMPLE_CALL
4985 && !(gimple_call_flags (stmt) & ECF_CONST))
4986 {
4987 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4988 if (gimple_call_internal_p (stmt))
4989 switch (gimple_call_internal_fn (stmt))
4990 {
4991 case IFN_GOMP_SIMD_LANE:
4992 {
4993 class loop *loop = gimple_bb (stmt)->loop_father;
4994 tree uid = gimple_call_arg (stmt, 0);
4995 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4996 if (loop == NULL
4997 || loop->simduid != SSA_NAME_VAR (uid))
4998 clobbers_memory = true;
4999 break;
5000 }
5001 case IFN_MASK_LOAD:
5002 case IFN_MASK_STORE:
5003 break;
5004 default:
5005 clobbers_memory = true;
5006 break;
5007 }
5008 else
5009 clobbers_memory = true;
5010 }
5011 else if (stmt_code == GIMPLE_ASM
5012 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5013 || gimple_vuse (stmt)))
5014 clobbers_memory = true;
5015
5016 if (!gimple_vuse (stmt))
5017 return clobbers_memory;
5018
5019 if (stmt_code == GIMPLE_ASSIGN)
5020 {
5021 tree base;
5022 op0 = gimple_assign_lhs (stmt);
5023 op1 = gimple_assign_rhs1 (stmt);
5024
5025 if (DECL_P (op1)
5026 || (REFERENCE_CLASS_P (op1)
5027 && (base = get_base_address (op1))
5028 && TREE_CODE (base) != SSA_NAME
5029 && !is_gimple_min_invariant (base)))
5030 {
5031 ref.ref = op1;
5032 ref.is_read = true;
5033 ref.is_conditional_in_stmt = false;
5034 references->safe_push (ref);
5035 }
5036 }
5037 else if (stmt_code == GIMPLE_CALL)
5038 {
5039 unsigned i, n;
5040 tree ptr, type;
5041 unsigned int align;
5042
5043 ref.is_read = false;
5044 if (gimple_call_internal_p (stmt))
5045 switch (gimple_call_internal_fn (stmt))
5046 {
5047 case IFN_MASK_LOAD:
5048 if (gimple_call_lhs (stmt) == NULL_TREE)
5049 break;
5050 ref.is_read = true;
5051 /* FALLTHRU */
5052 case IFN_MASK_STORE:
5053 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5054 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5055 if (ref.is_read)
5056 type = TREE_TYPE (gimple_call_lhs (stmt));
5057 else
5058 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5059 if (TYPE_ALIGN (type) != align)
5060 type = build_aligned_type (type, align);
5061 ref.is_conditional_in_stmt = true;
5062 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5063 ptr);
5064 references->safe_push (ref);
5065 return false;
5066 default:
5067 break;
5068 }
5069
5070 op0 = gimple_call_lhs (stmt);
5071 n = gimple_call_num_args (stmt);
5072 for (i = 0; i < n; i++)
5073 {
5074 op1 = gimple_call_arg (stmt, i);
5075
5076 if (DECL_P (op1)
5077 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5078 {
5079 ref.ref = op1;
5080 ref.is_read = true;
5081 ref.is_conditional_in_stmt = false;
5082 references->safe_push (ref);
5083 }
5084 }
5085 }
5086 else
5087 return clobbers_memory;
5088
5089 if (op0
5090 && (DECL_P (op0)
5091 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5092 {
5093 ref.ref = op0;
5094 ref.is_read = false;
5095 ref.is_conditional_in_stmt = false;
5096 references->safe_push (ref);
5097 }
5098 return clobbers_memory;
5099 }
5100
5101
5102 /* Returns true if the loop-nest has any data reference. */
5103
5104 bool
5105 loop_nest_has_data_refs (loop_p loop)
5106 {
5107 basic_block *bbs = get_loop_body (loop);
5108 auto_vec<data_ref_loc, 3> references;
5109
5110 for (unsigned i = 0; i < loop->num_nodes; i++)
5111 {
5112 basic_block bb = bbs[i];
5113 gimple_stmt_iterator bsi;
5114
5115 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5116 {
5117 gimple *stmt = gsi_stmt (bsi);
5118 get_references_in_stmt (stmt, &references);
5119 if (references.length ())
5120 {
5121 free (bbs);
5122 return true;
5123 }
5124 }
5125 }
5126 free (bbs);
5127 return false;
5128 }
5129
5130 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5131 reference, returns false, otherwise returns true. NEST is the outermost
5132 loop of the loop nest in which the references should be analyzed. */
5133
5134 opt_result
5135 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5136 vec<data_reference_p> *datarefs)
5137 {
5138 unsigned i;
5139 auto_vec<data_ref_loc, 2> references;
5140 data_ref_loc *ref;
5141 data_reference_p dr;
5142
5143 if (get_references_in_stmt (stmt, &references))
5144 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5145 stmt);
5146
5147 FOR_EACH_VEC_ELT (references, i, ref)
5148 {
5149 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5150 loop_containing_stmt (stmt), ref->ref,
5151 stmt, ref->is_read, ref->is_conditional_in_stmt);
5152 gcc_assert (dr != NULL);
5153 datarefs->safe_push (dr);
5154 }
5155
5156 return opt_result::success ();
5157 }
5158
5159 /* Stores the data references in STMT to DATAREFS. If there is an
5160 unanalyzable reference, returns false, otherwise returns true.
5161 NEST is the outermost loop of the loop nest in which the references
5162 should be instantiated, LOOP is the loop in which the references
5163 should be analyzed. */
5164
5165 bool
5166 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5167 vec<data_reference_p> *datarefs)
5168 {
5169 unsigned i;
5170 auto_vec<data_ref_loc, 2> references;
5171 data_ref_loc *ref;
5172 bool ret = true;
5173 data_reference_p dr;
5174
5175 if (get_references_in_stmt (stmt, &references))
5176 return false;
5177
5178 FOR_EACH_VEC_ELT (references, i, ref)
5179 {
5180 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5181 ref->is_conditional_in_stmt);
5182 gcc_assert (dr != NULL);
5183 datarefs->safe_push (dr);
5184 }
5185
5186 return ret;
5187 }
5188
5189 /* Search the data references in LOOP, and record the information into
5190 DATAREFS. Returns chrec_dont_know when failing to analyze a
5191 difficult case, returns NULL_TREE otherwise. */
5192
5193 tree
5194 find_data_references_in_bb (class loop *loop, basic_block bb,
5195 vec<data_reference_p> *datarefs)
5196 {
5197 gimple_stmt_iterator bsi;
5198
5199 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5200 {
5201 gimple *stmt = gsi_stmt (bsi);
5202
5203 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5204 {
5205 struct data_reference *res;
5206 res = XCNEW (struct data_reference);
5207 datarefs->safe_push (res);
5208
5209 return chrec_dont_know;
5210 }
5211 }
5212
5213 return NULL_TREE;
5214 }
5215
5216 /* Search the data references in LOOP, and record the information into
5217 DATAREFS. Returns chrec_dont_know when failing to analyze a
5218 difficult case, returns NULL_TREE otherwise.
5219
5220 TODO: This function should be made smarter so that it can handle address
5221 arithmetic as if they were array accesses, etc. */
5222
5223 tree
5224 find_data_references_in_loop (class loop *loop,
5225 vec<data_reference_p> *datarefs)
5226 {
5227 basic_block bb, *bbs;
5228 unsigned int i;
5229
5230 bbs = get_loop_body_in_dom_order (loop);
5231
5232 for (i = 0; i < loop->num_nodes; i++)
5233 {
5234 bb = bbs[i];
5235
5236 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5237 {
5238 free (bbs);
5239 return chrec_dont_know;
5240 }
5241 }
5242 free (bbs);
5243
5244 return NULL_TREE;
5245 }
5246
5247 /* Return the alignment in bytes that DRB is guaranteed to have at all
5248 times. */
5249
5250 unsigned int
5251 dr_alignment (innermost_loop_behavior *drb)
5252 {
5253 /* Get the alignment of BASE_ADDRESS + INIT. */
5254 unsigned int alignment = drb->base_alignment;
5255 unsigned int misalignment = (drb->base_misalignment
5256 + TREE_INT_CST_LOW (drb->init));
5257 if (misalignment != 0)
5258 alignment = MIN (alignment, misalignment & -misalignment);
5259
5260 /* Cap it to the alignment of OFFSET. */
5261 if (!integer_zerop (drb->offset))
5262 alignment = MIN (alignment, drb->offset_alignment);
5263
5264 /* Cap it to the alignment of STEP. */
5265 if (!integer_zerop (drb->step))
5266 alignment = MIN (alignment, drb->step_alignment);
5267
5268 return alignment;
5269 }
5270
5271 /* If BASE is a pointer-typed SSA name, try to find the object that it
5272 is based on. Return this object X on success and store the alignment
5273 in bytes of BASE - &X in *ALIGNMENT_OUT. */
5274
5275 static tree
5276 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5277 {
5278 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5279 return NULL_TREE;
5280
5281 gimple *def = SSA_NAME_DEF_STMT (base);
5282 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5283
5284 /* Peel chrecs and record the minimum alignment preserved by
5285 all steps. */
5286 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5287 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5288 {
5289 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5290 alignment = MIN (alignment, step_alignment);
5291 base = CHREC_LEFT (base);
5292 }
5293
5294 /* Punt if the expression is too complicated to handle. */
5295 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5296 return NULL_TREE;
5297
5298 /* The only useful cases are those for which a dereference folds to something
5299 other than an INDIRECT_REF. */
5300 tree ref_type = TREE_TYPE (TREE_TYPE (base));
5301 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5302 if (!ref)
5303 return NULL_TREE;
5304
5305 /* Analyze the base to which the steps we peeled were applied. */
5306 poly_int64 bitsize, bitpos, bytepos;
5307 machine_mode mode;
5308 int unsignedp, reversep, volatilep;
5309 tree offset;
5310 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5311 &unsignedp, &reversep, &volatilep);
5312 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5313 return NULL_TREE;
5314
5315 /* Restrict the alignment to that guaranteed by the offsets. */
5316 unsigned int bytepos_alignment = known_alignment (bytepos);
5317 if (bytepos_alignment != 0)
5318 alignment = MIN (alignment, bytepos_alignment);
5319 if (offset)
5320 {
5321 unsigned int offset_alignment = highest_pow2_factor (offset);
5322 alignment = MIN (alignment, offset_alignment);
5323 }
5324
5325 *alignment_out = alignment;
5326 return base;
5327 }
5328
5329 /* Return the object whose alignment would need to be changed in order
5330 to increase the alignment of ADDR. Store the maximum achievable
5331 alignment in *MAX_ALIGNMENT. */
5332
5333 tree
5334 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5335 {
5336 tree base = get_base_for_alignment_1 (addr, max_alignment);
5337 if (base)
5338 return base;
5339
5340 if (TREE_CODE (addr) == ADDR_EXPR)
5341 addr = TREE_OPERAND (addr, 0);
5342 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5343 return addr;
5344 }
5345
5346 /* Recursive helper function. */
5347
5348 static bool
5349 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5350 {
5351 /* Inner loops of the nest should not contain siblings. Example:
5352 when there are two consecutive loops,
5353
5354 | loop_0
5355 | loop_1
5356 | A[{0, +, 1}_1]
5357 | endloop_1
5358 | loop_2
5359 | A[{0, +, 1}_2]
5360 | endloop_2
5361 | endloop_0
5362
5363 the dependence relation cannot be captured by the distance
5364 abstraction. */
5365 if (loop->next)
5366 return false;
5367
5368 loop_nest->safe_push (loop);
5369 if (loop->inner)
5370 return find_loop_nest_1 (loop->inner, loop_nest);
5371 return true;
5372 }
5373
5374 /* Return false when the LOOP is not well nested. Otherwise return
5375 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5376 contain the loops from the outermost to the innermost, as they will
5377 appear in the classic distance vector. */
5378
5379 bool
5380 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5381 {
5382 loop_nest->safe_push (loop);
5383 if (loop->inner)
5384 return find_loop_nest_1 (loop->inner, loop_nest);
5385 return true;
5386 }
5387
5388 /* Returns true when the data dependences have been computed, false otherwise.
5389 Given a loop nest LOOP, the following vectors are returned:
5390 DATAREFS is initialized to all the array elements contained in this loop,
5391 DEPENDENCE_RELATIONS contains the relations between the data references.
5392 Compute read-read and self relations if
5393 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5394
5395 bool
5396 compute_data_dependences_for_loop (class loop *loop,
5397 bool compute_self_and_read_read_dependences,
5398 vec<loop_p> *loop_nest,
5399 vec<data_reference_p> *datarefs,
5400 vec<ddr_p> *dependence_relations)
5401 {
5402 bool res = true;
5403
5404 memset (&dependence_stats, 0, sizeof (dependence_stats));
5405
5406 /* If the loop nest is not well formed, or one of the data references
5407 is not computable, give up without spending time to compute other
5408 dependences. */
5409 if (!loop
5410 || !find_loop_nest (loop, loop_nest)
5411 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5412 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5413 compute_self_and_read_read_dependences))
5414 res = false;
5415
5416 if (dump_file && (dump_flags & TDF_STATS))
5417 {
5418 fprintf (dump_file, "Dependence tester statistics:\n");
5419
5420 fprintf (dump_file, "Number of dependence tests: %d\n",
5421 dependence_stats.num_dependence_tests);
5422 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5423 dependence_stats.num_dependence_dependent);
5424 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5425 dependence_stats.num_dependence_independent);
5426 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5427 dependence_stats.num_dependence_undetermined);
5428
5429 fprintf (dump_file, "Number of subscript tests: %d\n",
5430 dependence_stats.num_subscript_tests);
5431 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5432 dependence_stats.num_subscript_undetermined);
5433 fprintf (dump_file, "Number of same subscript function: %d\n",
5434 dependence_stats.num_same_subscript_function);
5435
5436 fprintf (dump_file, "Number of ziv tests: %d\n",
5437 dependence_stats.num_ziv);
5438 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5439 dependence_stats.num_ziv_dependent);
5440 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5441 dependence_stats.num_ziv_independent);
5442 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5443 dependence_stats.num_ziv_unimplemented);
5444
5445 fprintf (dump_file, "Number of siv tests: %d\n",
5446 dependence_stats.num_siv);
5447 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5448 dependence_stats.num_siv_dependent);
5449 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5450 dependence_stats.num_siv_independent);
5451 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5452 dependence_stats.num_siv_unimplemented);
5453
5454 fprintf (dump_file, "Number of miv tests: %d\n",
5455 dependence_stats.num_miv);
5456 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5457 dependence_stats.num_miv_dependent);
5458 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5459 dependence_stats.num_miv_independent);
5460 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5461 dependence_stats.num_miv_unimplemented);
5462 }
5463
5464 return res;
5465 }
5466
5467 /* Free the memory used by a data dependence relation DDR. */
5468
5469 void
5470 free_dependence_relation (struct data_dependence_relation *ddr)
5471 {
5472 if (ddr == NULL)
5473 return;
5474
5475 if (DDR_SUBSCRIPTS (ddr).exists ())
5476 free_subscripts (DDR_SUBSCRIPTS (ddr));
5477 DDR_DIST_VECTS (ddr).release ();
5478 DDR_DIR_VECTS (ddr).release ();
5479
5480 free (ddr);
5481 }
5482
5483 /* Free the memory used by the data dependence relations from
5484 DEPENDENCE_RELATIONS. */
5485
5486 void
5487 free_dependence_relations (vec<ddr_p> dependence_relations)
5488 {
5489 unsigned int i;
5490 struct data_dependence_relation *ddr;
5491
5492 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5493 if (ddr)
5494 free_dependence_relation (ddr);
5495
5496 dependence_relations.release ();
5497 }
5498
5499 /* Free the memory used by the data references from DATAREFS. */
5500
5501 void
5502 free_data_refs (vec<data_reference_p> datarefs)
5503 {
5504 unsigned int i;
5505 struct data_reference *dr;
5506
5507 FOR_EACH_VEC_ELT (datarefs, i, dr)
5508 free_data_ref (dr);
5509 datarefs.release ();
5510 }
5511
5512 /* Common routine implementing both dr_direction_indicator and
5513 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
5514 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5515 Return the step as the indicator otherwise. */
5516
5517 static tree
5518 dr_step_indicator (struct data_reference *dr, int useful_min)
5519 {
5520 tree step = DR_STEP (dr);
5521 if (!step)
5522 return NULL_TREE;
5523 STRIP_NOPS (step);
5524 /* Look for cases where the step is scaled by a positive constant
5525 integer, which will often be the access size. If the multiplication
5526 doesn't change the sign (due to overflow effects) then we can
5527 test the unscaled value instead. */
5528 if (TREE_CODE (step) == MULT_EXPR
5529 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5530 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5531 {
5532 tree factor = TREE_OPERAND (step, 1);
5533 step = TREE_OPERAND (step, 0);
5534
5535 /* Strip widening and truncating conversions as well as nops. */
5536 if (CONVERT_EXPR_P (step)
5537 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5538 step = TREE_OPERAND (step, 0);
5539 tree type = TREE_TYPE (step);
5540
5541 /* Get the range of step values that would not cause overflow. */
5542 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5543 / wi::to_widest (factor));
5544 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5545 / wi::to_widest (factor));
5546
5547 /* Get the range of values that the unconverted step actually has. */
5548 wide_int step_min, step_max;
5549 if (TREE_CODE (step) != SSA_NAME
5550 || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5551 {
5552 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5553 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5554 }
5555
5556 /* Check whether the unconverted step has an acceptable range. */
5557 signop sgn = TYPE_SIGN (type);
5558 if (wi::les_p (minv, widest_int::from (step_min, sgn))
5559 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5560 {
5561 if (wi::ge_p (step_min, useful_min, sgn))
5562 return ssize_int (useful_min);
5563 else if (wi::lt_p (step_max, 0, sgn))
5564 return ssize_int (-1);
5565 else
5566 return fold_convert (ssizetype, step);
5567 }
5568 }
5569 return DR_STEP (dr);
5570 }
5571
5572 /* Return a value that is negative iff DR has a negative step. */
5573
5574 tree
5575 dr_direction_indicator (struct data_reference *dr)
5576 {
5577 return dr_step_indicator (dr, 0);
5578 }
5579
5580 /* Return a value that is zero iff DR has a zero step. */
5581
5582 tree
5583 dr_zero_step_indicator (struct data_reference *dr)
5584 {
5585 return dr_step_indicator (dr, 1);
5586 }
5587
5588 /* Return true if DR is known to have a nonnegative (but possibly zero)
5589 step. */
5590
5591 bool
5592 dr_known_forward_stride_p (struct data_reference *dr)
5593 {
5594 tree indicator = dr_direction_indicator (dr);
5595 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5596 fold_convert (ssizetype, indicator),
5597 ssize_int (0));
5598 return neg_step_val && integer_zerop (neg_step_val);
5599 }