]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-data-ref.cc
Don't build readline/libreadline.a, when --with-system-readline is supplied
[thirdparty/gcc.git] / gcc / tree-data-ref.cc
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
103
104 static struct datadep_stats
105 {
106 int num_dependence_tests;
107 int num_dependence_dependent;
108 int num_dependence_independent;
109 int num_dependence_undetermined;
110
111 int num_subscript_tests;
112 int num_subscript_undetermined;
113 int num_same_subscript_function;
114
115 int num_ziv;
116 int num_ziv_independent;
117 int num_ziv_dependent;
118 int num_ziv_unimplemented;
119
120 int num_siv;
121 int num_siv_independent;
122 int num_siv_dependent;
123 int num_siv_unimplemented;
124
125 int num_miv;
126 int num_miv_independent;
127 int num_miv_dependent;
128 int num_miv_unimplemented;
129 } dependence_stats;
130
131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
132 unsigned int, unsigned int,
133 class loop *);
134 /* Returns true iff A divides B. */
135
136 static inline bool
137 tree_fold_divides_p (const_tree a, const_tree b)
138 {
139 gcc_assert (TREE_CODE (a) == INTEGER_CST);
140 gcc_assert (TREE_CODE (b) == INTEGER_CST);
141 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
142 }
143
144 /* Returns true iff A divides B. */
145
146 static inline bool
147 int_divides_p (lambda_int a, lambda_int b)
148 {
149 return ((b % a) == 0);
150 }
151
152 /* Return true if reference REF contains a union access. */
153
154 static bool
155 ref_contains_union_access_p (tree ref)
156 {
157 while (handled_component_p (ref))
158 {
159 ref = TREE_OPERAND (ref, 0);
160 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
161 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
162 return true;
163 }
164 return false;
165 }
166
167 \f
168
169 /* Dump into FILE all the data references from DATAREFS. */
170
171 static void
172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
173 {
174 for (data_reference *dr : datarefs)
175 dump_data_reference (file, dr);
176 }
177
178 /* Unified dump into FILE all the data references from DATAREFS. */
179
180 DEBUG_FUNCTION void
181 debug (vec<data_reference_p> &ref)
182 {
183 dump_data_references (stderr, ref);
184 }
185
186 DEBUG_FUNCTION void
187 debug (vec<data_reference_p> *ptr)
188 {
189 if (ptr)
190 debug (*ptr);
191 else
192 fprintf (stderr, "<nil>\n");
193 }
194
195
196 /* Dump into STDERR all the data references from DATAREFS. */
197
198 DEBUG_FUNCTION void
199 debug_data_references (vec<data_reference_p> datarefs)
200 {
201 dump_data_references (stderr, datarefs);
202 }
203
204 /* Print to STDERR the data_reference DR. */
205
206 DEBUG_FUNCTION void
207 debug_data_reference (struct data_reference *dr)
208 {
209 dump_data_reference (stderr, dr);
210 }
211
212 /* Dump function for a DATA_REFERENCE structure. */
213
214 void
215 dump_data_reference (FILE *outf,
216 struct data_reference *dr)
217 {
218 unsigned int i;
219
220 fprintf (outf, "#(Data Ref: \n");
221 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222 fprintf (outf, "# stmt: ");
223 print_gimple_stmt (outf, DR_STMT (dr), 0);
224 fprintf (outf, "# ref: ");
225 print_generic_stmt (outf, DR_REF (dr));
226 fprintf (outf, "# base_object: ");
227 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
228
229 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
230 {
231 fprintf (outf, "# Access function %d: ", i);
232 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
233 }
234 fprintf (outf, "#)\n");
235 }
236
237 /* Unified dump function for a DATA_REFERENCE structure. */
238
239 DEBUG_FUNCTION void
240 debug (data_reference &ref)
241 {
242 dump_data_reference (stderr, &ref);
243 }
244
245 DEBUG_FUNCTION void
246 debug (data_reference *ptr)
247 {
248 if (ptr)
249 debug (*ptr);
250 else
251 fprintf (stderr, "<nil>\n");
252 }
253
254
255 /* Dumps the affine function described by FN to the file OUTF. */
256
257 DEBUG_FUNCTION void
258 dump_affine_function (FILE *outf, affine_fn fn)
259 {
260 unsigned i;
261 tree coef;
262
263 print_generic_expr (outf, fn[0], TDF_SLIM);
264 for (i = 1; fn.iterate (i, &coef); i++)
265 {
266 fprintf (outf, " + ");
267 print_generic_expr (outf, coef, TDF_SLIM);
268 fprintf (outf, " * x_%u", i);
269 }
270 }
271
272 /* Dumps the conflict function CF to the file OUTF. */
273
274 DEBUG_FUNCTION void
275 dump_conflict_function (FILE *outf, conflict_function *cf)
276 {
277 unsigned i;
278
279 if (cf->n == NO_DEPENDENCE)
280 fprintf (outf, "no dependence");
281 else if (cf->n == NOT_KNOWN)
282 fprintf (outf, "not known");
283 else
284 {
285 for (i = 0; i < cf->n; i++)
286 {
287 if (i != 0)
288 fprintf (outf, " ");
289 fprintf (outf, "[");
290 dump_affine_function (outf, cf->fns[i]);
291 fprintf (outf, "]");
292 }
293 }
294 }
295
296 /* Dump function for a SUBSCRIPT structure. */
297
298 DEBUG_FUNCTION void
299 dump_subscript (FILE *outf, struct subscript *subscript)
300 {
301 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
302
303 fprintf (outf, "\n (subscript \n");
304 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
305 dump_conflict_function (outf, cf);
306 if (CF_NONTRIVIAL_P (cf))
307 {
308 tree last_iteration = SUB_LAST_CONFLICT (subscript);
309 fprintf (outf, "\n last_conflict: ");
310 print_generic_expr (outf, last_iteration);
311 }
312
313 cf = SUB_CONFLICTS_IN_B (subscript);
314 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
315 dump_conflict_function (outf, cf);
316 if (CF_NONTRIVIAL_P (cf))
317 {
318 tree last_iteration = SUB_LAST_CONFLICT (subscript);
319 fprintf (outf, "\n last_conflict: ");
320 print_generic_expr (outf, last_iteration);
321 }
322
323 fprintf (outf, "\n (Subscript distance: ");
324 print_generic_expr (outf, SUB_DISTANCE (subscript));
325 fprintf (outf, " ))\n");
326 }
327
328 /* Print the classic direction vector DIRV to OUTF. */
329
330 DEBUG_FUNCTION void
331 print_direction_vector (FILE *outf,
332 lambda_vector dirv,
333 int length)
334 {
335 int eq;
336
337 for (eq = 0; eq < length; eq++)
338 {
339 enum data_dependence_direction dir = ((enum data_dependence_direction)
340 dirv[eq]);
341
342 switch (dir)
343 {
344 case dir_positive:
345 fprintf (outf, " +");
346 break;
347 case dir_negative:
348 fprintf (outf, " -");
349 break;
350 case dir_equal:
351 fprintf (outf, " =");
352 break;
353 case dir_positive_or_equal:
354 fprintf (outf, " +=");
355 break;
356 case dir_positive_or_negative:
357 fprintf (outf, " +-");
358 break;
359 case dir_negative_or_equal:
360 fprintf (outf, " -=");
361 break;
362 case dir_star:
363 fprintf (outf, " *");
364 break;
365 default:
366 fprintf (outf, "indep");
367 break;
368 }
369 }
370 fprintf (outf, "\n");
371 }
372
373 /* Print a vector of direction vectors. */
374
375 DEBUG_FUNCTION void
376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 int length)
378 {
379 for (lambda_vector v : dir_vects)
380 print_direction_vector (outf, v, length);
381 }
382
383 /* Print out a vector VEC of length N to OUTFILE. */
384
385 DEBUG_FUNCTION void
386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
387 {
388 int i;
389
390 for (i = 0; i < n; i++)
391 fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
392 fprintf (outfile, "\n");
393 }
394
395 /* Print a vector of distance vectors. */
396
397 DEBUG_FUNCTION void
398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
399 int length)
400 {
401 for (lambda_vector v : dist_vects)
402 print_lambda_vector (outf, v, length);
403 }
404
405 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
406
407 DEBUG_FUNCTION void
408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
409 {
410 struct data_reference *dra, *drb;
411
412 fprintf (outf, "(Data Dep: \n");
413
414 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
415 {
416 if (ddr)
417 {
418 dra = DDR_A (ddr);
419 drb = DDR_B (ddr);
420 if (dra)
421 dump_data_reference (outf, dra);
422 else
423 fprintf (outf, " (nil)\n");
424 if (drb)
425 dump_data_reference (outf, drb);
426 else
427 fprintf (outf, " (nil)\n");
428 }
429 fprintf (outf, " (don't know)\n)\n");
430 return;
431 }
432
433 dra = DDR_A (ddr);
434 drb = DDR_B (ddr);
435 dump_data_reference (outf, dra);
436 dump_data_reference (outf, drb);
437
438 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
439 fprintf (outf, " (no dependence)\n");
440
441 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
442 {
443 unsigned int i;
444 class loop *loopi;
445
446 subscript *sub;
447 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
448 {
449 fprintf (outf, " access_fn_A: ");
450 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
451 fprintf (outf, " access_fn_B: ");
452 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
453 dump_subscript (outf, sub);
454 }
455
456 fprintf (outf, " loop nest: (");
457 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
458 fprintf (outf, "%d ", loopi->num);
459 fprintf (outf, ")\n");
460
461 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
462 {
463 fprintf (outf, " distance_vector: ");
464 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
465 DDR_NB_LOOPS (ddr));
466 }
467
468 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
469 {
470 fprintf (outf, " direction_vector: ");
471 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
472 DDR_NB_LOOPS (ddr));
473 }
474 }
475
476 fprintf (outf, ")\n");
477 }
478
479 /* Debug version. */
480
481 DEBUG_FUNCTION void
482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
483 {
484 dump_data_dependence_relation (stderr, ddr);
485 }
486
487 /* Dump into FILE all the dependence relations from DDRS. */
488
489 DEBUG_FUNCTION void
490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
491 {
492 for (auto ddr : ddrs)
493 dump_data_dependence_relation (file, ddr);
494 }
495
496 DEBUG_FUNCTION void
497 debug (vec<ddr_p> &ref)
498 {
499 dump_data_dependence_relations (stderr, ref);
500 }
501
502 DEBUG_FUNCTION void
503 debug (vec<ddr_p> *ptr)
504 {
505 if (ptr)
506 debug (*ptr);
507 else
508 fprintf (stderr, "<nil>\n");
509 }
510
511
512 /* Dump to STDERR all the dependence relations from DDRS. */
513
514 DEBUG_FUNCTION void
515 debug_data_dependence_relations (vec<ddr_p> ddrs)
516 {
517 dump_data_dependence_relations (stderr, ddrs);
518 }
519
520 /* Dumps the distance and direction vectors in FILE. DDRS contains
521 the dependence relations, and VECT_SIZE is the size of the
522 dependence vectors, or in other words the number of loops in the
523 considered nest. */
524
525 DEBUG_FUNCTION void
526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
527 {
528 for (data_dependence_relation *ddr : ddrs)
529 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
530 {
531 for (lambda_vector v : DDR_DIST_VECTS (ddr))
532 {
533 fprintf (file, "DISTANCE_V (");
534 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
535 fprintf (file, ")\n");
536 }
537
538 for (lambda_vector v : DDR_DIR_VECTS (ddr))
539 {
540 fprintf (file, "DIRECTION_V (");
541 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
542 fprintf (file, ")\n");
543 }
544 }
545
546 fprintf (file, "\n\n");
547 }
548
549 /* Dumps the data dependence relations DDRS in FILE. */
550
551 DEBUG_FUNCTION void
552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
553 {
554 for (data_dependence_relation *ddr : ddrs)
555 dump_data_dependence_relation (file, ddr);
556
557 fprintf (file, "\n\n");
558 }
559
560 DEBUG_FUNCTION void
561 debug_ddrs (vec<ddr_p> ddrs)
562 {
563 dump_ddrs (stderr, ddrs);
564 }
565
566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
567 OP0 CODE OP1, where:
568
569 - OP0 CODE OP1 has integral type TYPE
570 - the range of OP0 is given by OP0_RANGE and
571 - the range of OP1 is given by OP1_RANGE.
572
573 Independently of RESULT_RANGE, try to compute:
574
575 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
576 - (sizetype) (OP0 CODE OP1)
577
578 as a constant and subtract DELTA from the ssizetype constant in *OFF.
579 Return true on success, or false if DELTA is not known at compile time.
580
581 Truncation and sign changes are known to distribute over CODE, i.e.
582
583 (itype) (A CODE B) == (itype) A CODE (itype) B
584
585 for any integral type ITYPE whose precision is no greater than the
586 precision of A and B. */
587
588 static bool
589 compute_distributive_range (tree type, value_range &op0_range,
590 tree_code code, value_range &op1_range,
591 tree *off, value_range *result_range)
592 {
593 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
594 if (result_range)
595 {
596 range_op_handler op (code, type);
597 if (!op.fold_range (*result_range, type, op0_range, op1_range))
598 result_range->set_varying (type);
599 }
600
601 /* The distributive property guarantees that if TYPE is no narrower
602 than SIZETYPE,
603
604 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
605
606 and so we can treat DELTA as zero. */
607 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
608 return true;
609
610 /* If overflow is undefined, we can assume that:
611
612 X == (ssizetype) OP0 CODE (ssizetype) OP1
613
614 is within the range of TYPE, i.e.:
615
616 X == (ssizetype) (TYPE) X
617
618 Distributing the (TYPE) truncation over X gives:
619
620 X == (ssizetype) (OP0 CODE OP1)
621
622 Casting both sides to sizetype and distributing the sizetype cast
623 over X gives:
624
625 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
626
627 and so we can treat DELTA as zero. */
628 if (TYPE_OVERFLOW_UNDEFINED (type))
629 return true;
630
631 /* Compute the range of:
632
633 (ssizetype) OP0 CODE (ssizetype) OP1
634
635 The distributive property guarantees that this has the same bitpattern as:
636
637 (sizetype) OP0 CODE (sizetype) OP1
638
639 but its range is more conducive to analysis. */
640 range_cast (op0_range, ssizetype);
641 range_cast (op1_range, ssizetype);
642 value_range wide_range;
643 range_op_handler op (code, ssizetype);
644 bool saved_flag_wrapv = flag_wrapv;
645 flag_wrapv = 1;
646 if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
647 wide_range.set_varying (ssizetype);;
648 flag_wrapv = saved_flag_wrapv;
649 if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
650 return false;
651
652 wide_int lb = wide_range.lower_bound ();
653 wide_int ub = wide_range.upper_bound ();
654
655 /* Calculate the number of times that each end of the range overflows or
656 underflows TYPE. We can only calculate DELTA if the numbers match. */
657 unsigned int precision = TYPE_PRECISION (type);
658 if (!TYPE_UNSIGNED (type))
659 {
660 wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
661 lb -= type_min;
662 ub -= type_min;
663 }
664 wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
665 lb &= upper_bits;
666 ub &= upper_bits;
667 if (lb != ub)
668 return false;
669
670 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
671 negative values indicating underflow. The low PRECISION bits of LB
672 are clear, so DELTA is therefore LB (== UB). */
673 *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
674 return true;
675 }
676
677 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
678 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
679 FROM_TYPE are integral types. */
680
681 static bool
682 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
683 {
684 gcc_assert (INTEGRAL_TYPE_P (to_type)
685 && INTEGRAL_TYPE_P (from_type)
686 && !TYPE_OVERFLOW_TRAPS (to_type)
687 && !TYPE_OVERFLOW_TRAPS (from_type));
688
689 /* Converting to something no narrower than sizetype and then to sizetype
690 is equivalent to converting directly to sizetype. */
691 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
692 return true;
693
694 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
695 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
696 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
697 return true;
698
699 /* For narrowing conversions, we could in principle test whether
700 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
701 and apply a constant adjustment.
702
703 For other conversions (which involve a sign change) we could
704 check that the signs are always equal, and apply a constant
705 adjustment if the signs are negative.
706
707 However, both cases should be rare. */
708 return range_fits_type_p (&range, TYPE_PRECISION (to_type),
709 TYPE_SIGN (to_type));
710 }
711
712 static void
713 split_constant_offset (tree type, tree *var, tree *off,
714 value_range *result_range,
715 hash_map<tree, std::pair<tree, tree> > &cache,
716 unsigned *limit);
717
718 /* Helper function for split_constant_offset. If TYPE is a pointer type,
719 try to express OP0 CODE OP1 as:
720
721 POINTER_PLUS <*VAR, (sizetype) *OFF>
722
723 where:
724
725 - *VAR has type TYPE
726 - *OFF is a constant of type ssizetype.
727
728 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
729
730 *VAR + (sizetype) *OFF
731
732 where:
733
734 - *VAR has type sizetype
735 - *OFF is a constant of type ssizetype.
736
737 In both cases, OP0 CODE OP1 has type TYPE.
738
739 Return true on success. A false return value indicates that we can't
740 do better than set *OFF to zero.
741
742 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
743 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
744
745 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
746 visited. LIMIT counts down the number of SSA names that we are
747 allowed to process before giving up. */
748
749 static bool
750 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
751 tree *var, tree *off, value_range *result_range,
752 hash_map<tree, std::pair<tree, tree> > &cache,
753 unsigned *limit)
754 {
755 tree var0, var1;
756 tree off0, off1;
757 value_range op0_range, op1_range;
758
759 *var = NULL_TREE;
760 *off = NULL_TREE;
761
762 if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
763 return false;
764
765 switch (code)
766 {
767 case INTEGER_CST:
768 *var = size_int (0);
769 *off = fold_convert (ssizetype, op0);
770 if (result_range)
771 result_range->set (op0, op0);
772 return true;
773
774 case POINTER_PLUS_EXPR:
775 split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
776 split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
777 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
778 *off = size_binop (PLUS_EXPR, off0, off1);
779 return true;
780
781 case PLUS_EXPR:
782 case MINUS_EXPR:
783 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
784 split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
785 *off = size_binop (code, off0, off1);
786 if (!compute_distributive_range (type, op0_range, code, op1_range,
787 off, result_range))
788 return false;
789 *var = fold_build2 (code, sizetype, var0, var1);
790 return true;
791
792 case MULT_EXPR:
793 if (TREE_CODE (op1) != INTEGER_CST)
794 return false;
795
796 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
797 op1_range.set (op1, op1);
798 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
799 if (!compute_distributive_range (type, op0_range, code, op1_range,
800 off, result_range))
801 return false;
802 *var = fold_build2 (MULT_EXPR, sizetype, var0,
803 fold_convert (sizetype, op1));
804 return true;
805
806 case ADDR_EXPR:
807 {
808 tree base, poffset;
809 poly_int64 pbitsize, pbitpos, pbytepos;
810 machine_mode pmode;
811 int punsignedp, preversep, pvolatilep;
812
813 op0 = TREE_OPERAND (op0, 0);
814 base
815 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
816 &punsignedp, &preversep, &pvolatilep);
817
818 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
819 return false;
820 base = build_fold_addr_expr (base);
821 off0 = ssize_int (pbytepos);
822
823 if (poffset)
824 {
825 split_constant_offset (poffset, &poffset, &off1, nullptr,
826 cache, limit);
827 off0 = size_binop (PLUS_EXPR, off0, off1);
828 base = fold_build_pointer_plus (base, poffset);
829 }
830
831 var0 = fold_convert (type, base);
832
833 /* If variable length types are involved, punt, otherwise casts
834 might be converted into ARRAY_REFs in gimplify_conversion.
835 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
836 possibly no longer appears in current GIMPLE, might resurface.
837 This perhaps could run
838 if (CONVERT_EXPR_P (var0))
839 {
840 gimplify_conversion (&var0);
841 // Attempt to fill in any within var0 found ARRAY_REF's
842 // element size from corresponding op embedded ARRAY_REF,
843 // if unsuccessful, just punt.
844 } */
845 while (POINTER_TYPE_P (type))
846 type = TREE_TYPE (type);
847 if (int_size_in_bytes (type) < 0)
848 return false;
849
850 *var = var0;
851 *off = off0;
852 return true;
853 }
854
855 case SSA_NAME:
856 {
857 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
858 return false;
859
860 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
861 enum tree_code subcode;
862
863 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
864 return false;
865
866 subcode = gimple_assign_rhs_code (def_stmt);
867
868 /* We are using a cache to avoid un-CSEing large amounts of code. */
869 bool use_cache = false;
870 if (!has_single_use (op0)
871 && (subcode == POINTER_PLUS_EXPR
872 || subcode == PLUS_EXPR
873 || subcode == MINUS_EXPR
874 || subcode == MULT_EXPR
875 || subcode == ADDR_EXPR
876 || CONVERT_EXPR_CODE_P (subcode)))
877 {
878 use_cache = true;
879 bool existed;
880 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
881 if (existed)
882 {
883 if (integer_zerop (e.second))
884 return false;
885 *var = e.first;
886 *off = e.second;
887 /* The caller sets the range in this case. */
888 return true;
889 }
890 e = std::make_pair (op0, ssize_int (0));
891 }
892
893 if (*limit == 0)
894 return false;
895 --*limit;
896
897 var0 = gimple_assign_rhs1 (def_stmt);
898 var1 = gimple_assign_rhs2 (def_stmt);
899
900 bool res = split_constant_offset_1 (type, var0, subcode, var1,
901 var, off, nullptr, cache, limit);
902 if (res && use_cache)
903 *cache.get (op0) = std::make_pair (*var, *off);
904 /* The caller sets the range in this case. */
905 return res;
906 }
907 CASE_CONVERT:
908 {
909 /* We can only handle the following conversions:
910
911 - Conversions from one pointer type to another pointer type.
912
913 - Conversions from one non-trapping integral type to another
914 non-trapping integral type. In this case, the recursive
915 call makes sure that:
916
917 (sizetype) OP0
918
919 can be expressed as a sizetype operation involving VAR and OFF,
920 and all we need to do is check whether:
921
922 (sizetype) OP0 == (sizetype) (TYPE) OP0
923
924 - Conversions from a non-trapping sizetype-size integral type to
925 a like-sized pointer type. In this case, the recursive call
926 makes sure that:
927
928 (sizetype) OP0 == *VAR + (sizetype) *OFF
929
930 and we can convert that to:
931
932 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
933
934 - Conversions from a sizetype-sized pointer type to a like-sized
935 non-trapping integral type. In this case, the recursive call
936 makes sure that:
937
938 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
939
940 where the POINTER_PLUS and *VAR have the same precision as
941 TYPE (and the same precision as sizetype). Then:
942
943 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
944 tree itype = TREE_TYPE (op0);
945 if ((POINTER_TYPE_P (itype)
946 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
947 && (POINTER_TYPE_P (type)
948 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
949 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
950 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
951 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
952 {
953 if (POINTER_TYPE_P (type))
954 {
955 split_constant_offset (op0, var, off, nullptr, cache, limit);
956 *var = fold_convert (type, *var);
957 }
958 else if (POINTER_TYPE_P (itype))
959 {
960 split_constant_offset (op0, var, off, nullptr, cache, limit);
961 *var = fold_convert (sizetype, *var);
962 }
963 else
964 {
965 split_constant_offset (op0, var, off, &op0_range,
966 cache, limit);
967 if (!nop_conversion_for_offset_p (type, itype, op0_range))
968 return false;
969 if (result_range)
970 {
971 *result_range = op0_range;
972 range_cast (*result_range, type);
973 }
974 }
975 return true;
976 }
977 return false;
978 }
979
980 default:
981 return false;
982 }
983 }
984
985 /* If EXP has pointer type, try to express it as:
986
987 POINTER_PLUS <*VAR, (sizetype) *OFF>
988
989 where:
990
991 - *VAR has the same type as EXP
992 - *OFF is a constant of type ssizetype.
993
994 If EXP has an integral type, try to express (sizetype) EXP as:
995
996 *VAR + (sizetype) *OFF
997
998 where:
999
1000 - *VAR has type sizetype
1001 - *OFF is a constant of type ssizetype.
1002
1003 If EXP_RANGE is nonnull, set it to the range of EXP.
1004
1005 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1006 visited. LIMIT counts down the number of SSA names that we are
1007 allowed to process before giving up. */
1008
1009 static void
1010 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1011 hash_map<tree, std::pair<tree, tree> > &cache,
1012 unsigned *limit)
1013 {
1014 tree type = TREE_TYPE (exp), op0, op1;
1015 enum tree_code code;
1016
1017 code = TREE_CODE (exp);
1018 if (exp_range)
1019 {
1020 *exp_range = type;
1021 if (code == SSA_NAME)
1022 {
1023 value_range vr;
1024 get_range_query (cfun)->range_of_expr (vr, exp);
1025 if (vr.undefined_p ())
1026 vr.set_varying (TREE_TYPE (exp));
1027 wide_int var_min = wi::to_wide (vr.min ());
1028 wide_int var_max = wi::to_wide (vr.max ());
1029 value_range_kind vr_kind = vr.kind ();
1030 wide_int var_nonzero = get_nonzero_bits (exp);
1031 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1032 &var_min, &var_max,
1033 var_nonzero,
1034 TYPE_SIGN (type));
1035 /* This check for VR_VARYING is here because the old code
1036 using get_range_info would return VR_RANGE for the entire
1037 domain, instead of VR_VARYING. The new code normalizes
1038 full-domain ranges to VR_VARYING. */
1039 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1040 *exp_range = value_range (type, var_min, var_max);
1041 }
1042 }
1043
1044 if (!tree_is_chrec (exp)
1045 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1046 {
1047 extract_ops_from_tree (exp, &code, &op0, &op1);
1048 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1049 exp_range, cache, limit))
1050 return;
1051 }
1052
1053 *var = exp;
1054 if (INTEGRAL_TYPE_P (type))
1055 *var = fold_convert (sizetype, *var);
1056 *off = ssize_int (0);
1057
1058 value_range r;
1059 if (exp_range && code != SSA_NAME
1060 && get_range_query (cfun)->range_of_expr (r, exp)
1061 && !r.undefined_p ())
1062 *exp_range = r;
1063 }
1064
1065 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1066 type as EXP while OFF has type ssizetype. */
1067
1068 void
1069 split_constant_offset (tree exp, tree *var, tree *off)
1070 {
1071 unsigned limit = param_ssa_name_def_chain_limit;
1072 static hash_map<tree, std::pair<tree, tree> > *cache;
1073 if (!cache)
1074 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1075 split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1076 *var = fold_convert (TREE_TYPE (exp), *var);
1077 cache->empty ();
1078 }
1079
1080 /* Returns the address ADDR of an object in a canonical shape (without nop
1081 casts, and with type of pointer to the object). */
1082
1083 static tree
1084 canonicalize_base_object_address (tree addr)
1085 {
1086 tree orig = addr;
1087
1088 STRIP_NOPS (addr);
1089
1090 /* The base address may be obtained by casting from integer, in that case
1091 keep the cast. */
1092 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1093 return orig;
1094
1095 if (TREE_CODE (addr) != ADDR_EXPR)
1096 return addr;
1097
1098 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1099 }
1100
1101 /* Analyze the behavior of memory reference REF within STMT.
1102 There are two modes:
1103
1104 - BB analysis. In this case we simply split the address into base,
1105 init and offset components, without reference to any containing loop.
1106 The resulting base and offset are general expressions and they can
1107 vary arbitrarily from one iteration of the containing loop to the next.
1108 The step is always zero.
1109
1110 - loop analysis. In this case we analyze the reference both wrt LOOP
1111 and on the basis that the reference occurs (is "used") in LOOP;
1112 see the comment above analyze_scalar_evolution_in_loop for more
1113 information about this distinction. The base, init, offset and
1114 step fields are all invariant in LOOP.
1115
1116 Perform BB analysis if LOOP is null, or if LOOP is the function's
1117 dummy outermost loop. In other cases perform loop analysis.
1118
1119 Return true if the analysis succeeded and store the results in DRB if so.
1120 BB analysis can only fail for bitfield or reversed-storage accesses. */
1121
1122 opt_result
1123 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1124 class loop *loop, const gimple *stmt)
1125 {
1126 poly_int64 pbitsize, pbitpos;
1127 tree base, poffset;
1128 machine_mode pmode;
1129 int punsignedp, preversep, pvolatilep;
1130 affine_iv base_iv, offset_iv;
1131 tree init, dinit, step;
1132 bool in_loop = (loop && loop->num);
1133
1134 if (dump_file && (dump_flags & TDF_DETAILS))
1135 fprintf (dump_file, "analyze_innermost: ");
1136
1137 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1138 &punsignedp, &preversep, &pvolatilep);
1139 gcc_assert (base != NULL_TREE);
1140
1141 poly_int64 pbytepos;
1142 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1143 return opt_result::failure_at (stmt,
1144 "failed: bit offset alignment.\n");
1145
1146 if (preversep)
1147 return opt_result::failure_at (stmt,
1148 "failed: reverse storage order.\n");
1149
1150 /* Calculate the alignment and misalignment for the inner reference. */
1151 unsigned int HOST_WIDE_INT bit_base_misalignment;
1152 unsigned int bit_base_alignment;
1153 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1154
1155 /* There are no bitfield references remaining in BASE, so the values
1156 we got back must be whole bytes. */
1157 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1158 && bit_base_misalignment % BITS_PER_UNIT == 0);
1159 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1160 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1161
1162 if (TREE_CODE (base) == MEM_REF)
1163 {
1164 if (!integer_zerop (TREE_OPERAND (base, 1)))
1165 {
1166 /* Subtract MOFF from the base and add it to POFFSET instead.
1167 Adjust the misalignment to reflect the amount we subtracted. */
1168 poly_offset_int moff = mem_ref_offset (base);
1169 base_misalignment -= moff.force_shwi ();
1170 tree mofft = wide_int_to_tree (sizetype, moff);
1171 if (!poffset)
1172 poffset = mofft;
1173 else
1174 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1175 }
1176 base = TREE_OPERAND (base, 0);
1177 }
1178 else
1179 base = build_fold_addr_expr (base);
1180
1181 if (in_loop)
1182 {
1183 if (!simple_iv (loop, loop, base, &base_iv, true))
1184 return opt_result::failure_at
1185 (stmt, "failed: evolution of base is not affine.\n");
1186 }
1187 else
1188 {
1189 base_iv.base = base;
1190 base_iv.step = ssize_int (0);
1191 base_iv.no_overflow = true;
1192 }
1193
1194 if (!poffset)
1195 {
1196 offset_iv.base = ssize_int (0);
1197 offset_iv.step = ssize_int (0);
1198 }
1199 else
1200 {
1201 if (!in_loop)
1202 {
1203 offset_iv.base = poffset;
1204 offset_iv.step = ssize_int (0);
1205 }
1206 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1207 return opt_result::failure_at
1208 (stmt, "failed: evolution of offset is not affine.\n");
1209 }
1210
1211 init = ssize_int (pbytepos);
1212
1213 /* Subtract any constant component from the base and add it to INIT instead.
1214 Adjust the misalignment to reflect the amount we subtracted. */
1215 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1216 init = size_binop (PLUS_EXPR, init, dinit);
1217 base_misalignment -= TREE_INT_CST_LOW (dinit);
1218
1219 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1220 init = size_binop (PLUS_EXPR, init, dinit);
1221
1222 step = size_binop (PLUS_EXPR,
1223 fold_convert (ssizetype, base_iv.step),
1224 fold_convert (ssizetype, offset_iv.step));
1225
1226 base = canonicalize_base_object_address (base_iv.base);
1227
1228 /* See if get_pointer_alignment can guarantee a higher alignment than
1229 the one we calculated above. */
1230 unsigned int HOST_WIDE_INT alt_misalignment;
1231 unsigned int alt_alignment;
1232 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1233
1234 /* As above, these values must be whole bytes. */
1235 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1236 && alt_misalignment % BITS_PER_UNIT == 0);
1237 alt_alignment /= BITS_PER_UNIT;
1238 alt_misalignment /= BITS_PER_UNIT;
1239
1240 if (base_alignment < alt_alignment)
1241 {
1242 base_alignment = alt_alignment;
1243 base_misalignment = alt_misalignment;
1244 }
1245
1246 drb->base_address = base;
1247 drb->offset = fold_convert (ssizetype, offset_iv.base);
1248 drb->init = init;
1249 drb->step = step;
1250 if (known_misalignment (base_misalignment, base_alignment,
1251 &drb->base_misalignment))
1252 drb->base_alignment = base_alignment;
1253 else
1254 {
1255 drb->base_alignment = known_alignment (base_misalignment);
1256 drb->base_misalignment = 0;
1257 }
1258 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1259 drb->step_alignment = highest_pow2_factor (step);
1260
1261 if (dump_file && (dump_flags & TDF_DETAILS))
1262 fprintf (dump_file, "success.\n");
1263
1264 return opt_result::success ();
1265 }
1266
1267 /* Return true if OP is a valid component reference for a DR access
1268 function. This accepts a subset of what handled_component_p accepts. */
1269
1270 static bool
1271 access_fn_component_p (tree op)
1272 {
1273 switch (TREE_CODE (op))
1274 {
1275 case REALPART_EXPR:
1276 case IMAGPART_EXPR:
1277 case ARRAY_REF:
1278 return true;
1279
1280 case COMPONENT_REF:
1281 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1282
1283 default:
1284 return false;
1285 }
1286 }
1287
1288 /* Returns whether BASE can have a access_fn_component_p with BASE
1289 as base. */
1290
1291 static bool
1292 base_supports_access_fn_components_p (tree base)
1293 {
1294 switch (TREE_CODE (TREE_TYPE (base)))
1295 {
1296 case COMPLEX_TYPE:
1297 case ARRAY_TYPE:
1298 case RECORD_TYPE:
1299 return true;
1300 default:
1301 return false;
1302 }
1303 }
1304
1305 /* Determines the base object and the list of indices of memory reference
1306 DR, analyzed in LOOP and instantiated before NEST. */
1307
1308 static void
1309 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1310 {
1311 /* If analyzing a basic-block there are no indices to analyze
1312 and thus no access functions. */
1313 if (!nest)
1314 {
1315 dri->base_object = ref;
1316 dri->access_fns.create (0);
1317 return;
1318 }
1319
1320 vec<tree> access_fns = vNULL;
1321
1322 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1323 into a two element array with a constant index. The base is
1324 then just the immediate underlying object. */
1325 if (TREE_CODE (ref) == REALPART_EXPR)
1326 {
1327 ref = TREE_OPERAND (ref, 0);
1328 access_fns.safe_push (integer_zero_node);
1329 }
1330 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1331 {
1332 ref = TREE_OPERAND (ref, 0);
1333 access_fns.safe_push (integer_one_node);
1334 }
1335
1336 /* Analyze access functions of dimensions we know to be independent.
1337 The list of component references handled here should be kept in
1338 sync with access_fn_component_p. */
1339 while (handled_component_p (ref))
1340 {
1341 if (TREE_CODE (ref) == ARRAY_REF)
1342 {
1343 tree op = TREE_OPERAND (ref, 1);
1344 tree access_fn = analyze_scalar_evolution (loop, op);
1345 access_fn = instantiate_scev (nest, loop, access_fn);
1346 access_fns.safe_push (access_fn);
1347 }
1348 else if (TREE_CODE (ref) == COMPONENT_REF
1349 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1350 {
1351 /* For COMPONENT_REFs of records (but not unions!) use the
1352 FIELD_DECL offset as constant access function so we can
1353 disambiguate a[i].f1 and a[i].f2. */
1354 tree off = component_ref_field_offset (ref);
1355 off = size_binop (PLUS_EXPR,
1356 size_binop (MULT_EXPR,
1357 fold_convert (bitsizetype, off),
1358 bitsize_int (BITS_PER_UNIT)),
1359 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1360 access_fns.safe_push (off);
1361 }
1362 else
1363 /* If we have an unhandled component we could not translate
1364 to an access function stop analyzing. We have determined
1365 our base object in this case. */
1366 break;
1367
1368 ref = TREE_OPERAND (ref, 0);
1369 }
1370
1371 /* If the address operand of a MEM_REF base has an evolution in the
1372 analyzed nest, add it as an additional independent access-function. */
1373 if (TREE_CODE (ref) == MEM_REF)
1374 {
1375 tree op = TREE_OPERAND (ref, 0);
1376 tree access_fn = analyze_scalar_evolution (loop, op);
1377 access_fn = instantiate_scev (nest, loop, access_fn);
1378 STRIP_NOPS (access_fn);
1379 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1380 {
1381 tree memoff = TREE_OPERAND (ref, 1);
1382 tree base = initial_condition (access_fn);
1383 tree orig_type = TREE_TYPE (base);
1384 STRIP_USELESS_TYPE_CONVERSION (base);
1385 tree off;
1386 split_constant_offset (base, &base, &off);
1387 STRIP_USELESS_TYPE_CONVERSION (base);
1388 /* Fold the MEM_REF offset into the evolutions initial
1389 value to make more bases comparable. */
1390 if (!integer_zerop (memoff))
1391 {
1392 off = size_binop (PLUS_EXPR, off,
1393 fold_convert (ssizetype, memoff));
1394 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1395 }
1396 /* Adjust the offset so it is a multiple of the access type
1397 size and thus we separate bases that can possibly be used
1398 to produce partial overlaps (which the access_fn machinery
1399 cannot handle). */
1400 wide_int rem;
1401 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1402 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1403 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1404 rem = wi::mod_trunc
1405 (wi::to_wide (off),
1406 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1407 SIGNED);
1408 else
1409 /* If we can't compute the remainder simply force the initial
1410 condition to zero. */
1411 rem = wi::to_wide (off);
1412 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1413 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1414 /* And finally replace the initial condition. */
1415 access_fn = chrec_replace_initial_condition
1416 (access_fn, fold_convert (orig_type, off));
1417 /* ??? This is still not a suitable base object for
1418 dr_may_alias_p - the base object needs to be an
1419 access that covers the object as whole. With
1420 an evolution in the pointer this cannot be
1421 guaranteed.
1422 As a band-aid, mark the access so we can special-case
1423 it in dr_may_alias_p. */
1424 tree old = ref;
1425 ref = fold_build2_loc (EXPR_LOCATION (ref),
1426 MEM_REF, TREE_TYPE (ref),
1427 base, memoff);
1428 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1429 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1430 dri->unconstrained_base = true;
1431 access_fns.safe_push (access_fn);
1432 }
1433 }
1434 else if (DECL_P (ref))
1435 {
1436 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1437 ref = build2 (MEM_REF, TREE_TYPE (ref),
1438 build_fold_addr_expr (ref),
1439 build_int_cst (reference_alias_ptr_type (ref), 0));
1440 }
1441
1442 dri->base_object = ref;
1443 dri->access_fns = access_fns;
1444 }
1445
1446 /* Extracts the alias analysis information from the memory reference DR. */
1447
1448 static void
1449 dr_analyze_alias (struct data_reference *dr)
1450 {
1451 tree ref = DR_REF (dr);
1452 tree base = get_base_address (ref), addr;
1453
1454 if (INDIRECT_REF_P (base)
1455 || TREE_CODE (base) == MEM_REF)
1456 {
1457 addr = TREE_OPERAND (base, 0);
1458 if (TREE_CODE (addr) == SSA_NAME)
1459 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1460 }
1461 }
1462
1463 /* Frees data reference DR. */
1464
1465 void
1466 free_data_ref (data_reference_p dr)
1467 {
1468 DR_ACCESS_FNS (dr).release ();
1469 if (dr->alt_indices.base_object)
1470 dr->alt_indices.access_fns.release ();
1471 free (dr);
1472 }
1473
1474 /* Analyze memory reference MEMREF, which is accessed in STMT.
1475 The reference is a read if IS_READ is true, otherwise it is a write.
1476 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1477 within STMT, i.e. that it might not occur even if STMT is executed
1478 and runs to completion.
1479
1480 Return the data_reference description of MEMREF. NEST is the outermost
1481 loop in which the reference should be instantiated, LOOP is the loop
1482 in which the data reference should be analyzed. */
1483
1484 struct data_reference *
1485 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1486 bool is_read, bool is_conditional_in_stmt)
1487 {
1488 struct data_reference *dr;
1489
1490 if (dump_file && (dump_flags & TDF_DETAILS))
1491 {
1492 fprintf (dump_file, "Creating dr for ");
1493 print_generic_expr (dump_file, memref, TDF_SLIM);
1494 fprintf (dump_file, "\n");
1495 }
1496
1497 dr = XCNEW (struct data_reference);
1498 DR_STMT (dr) = stmt;
1499 DR_REF (dr) = memref;
1500 DR_IS_READ (dr) = is_read;
1501 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1502
1503 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1504 nest != NULL ? loop : NULL, stmt);
1505 dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1506 dr_analyze_alias (dr);
1507
1508 if (dump_file && (dump_flags & TDF_DETAILS))
1509 {
1510 unsigned i;
1511 fprintf (dump_file, "\tbase_address: ");
1512 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1513 fprintf (dump_file, "\n\toffset from base address: ");
1514 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1515 fprintf (dump_file, "\n\tconstant offset from base address: ");
1516 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1517 fprintf (dump_file, "\n\tstep: ");
1518 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1519 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1520 fprintf (dump_file, "\n\tbase misalignment: %d",
1521 DR_BASE_MISALIGNMENT (dr));
1522 fprintf (dump_file, "\n\toffset alignment: %d",
1523 DR_OFFSET_ALIGNMENT (dr));
1524 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1525 fprintf (dump_file, "\n\tbase_object: ");
1526 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1527 fprintf (dump_file, "\n");
1528 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1529 {
1530 fprintf (dump_file, "\tAccess function %d: ", i);
1531 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1532 }
1533 }
1534
1535 return dr;
1536 }
1537
1538 /* A helper function computes order between two tree expressions T1 and T2.
1539 This is used in comparator functions sorting objects based on the order
1540 of tree expressions. The function returns -1, 0, or 1. */
1541
1542 int
1543 data_ref_compare_tree (tree t1, tree t2)
1544 {
1545 int i, cmp;
1546 enum tree_code code;
1547 char tclass;
1548
1549 if (t1 == t2)
1550 return 0;
1551 if (t1 == NULL)
1552 return -1;
1553 if (t2 == NULL)
1554 return 1;
1555
1556 STRIP_USELESS_TYPE_CONVERSION (t1);
1557 STRIP_USELESS_TYPE_CONVERSION (t2);
1558 if (t1 == t2)
1559 return 0;
1560
1561 if (TREE_CODE (t1) != TREE_CODE (t2)
1562 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1563 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1564
1565 code = TREE_CODE (t1);
1566 switch (code)
1567 {
1568 case INTEGER_CST:
1569 return tree_int_cst_compare (t1, t2);
1570
1571 case STRING_CST:
1572 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1573 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1574 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1575 TREE_STRING_LENGTH (t1));
1576
1577 case SSA_NAME:
1578 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1579 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1580 break;
1581
1582 default:
1583 if (POLY_INT_CST_P (t1))
1584 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1585 wi::to_poly_widest (t2));
1586
1587 tclass = TREE_CODE_CLASS (code);
1588
1589 /* For decls, compare their UIDs. */
1590 if (tclass == tcc_declaration)
1591 {
1592 if (DECL_UID (t1) != DECL_UID (t2))
1593 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1594 break;
1595 }
1596 /* For expressions, compare their operands recursively. */
1597 else if (IS_EXPR_CODE_CLASS (tclass))
1598 {
1599 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1600 {
1601 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1602 TREE_OPERAND (t2, i));
1603 if (cmp != 0)
1604 return cmp;
1605 }
1606 }
1607 else
1608 gcc_unreachable ();
1609 }
1610
1611 return 0;
1612 }
1613
1614 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1615 check. */
1616
1617 opt_result
1618 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1619 {
1620 if (dump_enabled_p ())
1621 dump_printf (MSG_NOTE,
1622 "consider run-time aliasing test between %T and %T\n",
1623 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1624
1625 if (!speed_p)
1626 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1627 "runtime alias check not supported when"
1628 " optimizing for size.\n");
1629
1630 /* FORNOW: We don't support versioning with outer-loop in either
1631 vectorization or loop distribution. */
1632 if (loop != NULL && loop->inner != NULL)
1633 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1634 "runtime alias check not supported for"
1635 " outer loop.\n");
1636
1637 return opt_result::success ();
1638 }
1639
1640 /* Operator == between two dr_with_seg_len objects.
1641
1642 This equality operator is used to make sure two data refs
1643 are the same one so that we will consider to combine the
1644 aliasing checks of those two pairs of data dependent data
1645 refs. */
1646
1647 static bool
1648 operator == (const dr_with_seg_len& d1,
1649 const dr_with_seg_len& d2)
1650 {
1651 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1652 DR_BASE_ADDRESS (d2.dr), 0)
1653 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1654 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1655 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1656 && known_eq (d1.access_size, d2.access_size)
1657 && d1.align == d2.align);
1658 }
1659
1660 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1661 so that we can combine aliasing checks in one scan. */
1662
1663 static int
1664 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1665 {
1666 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1667 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1668 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1669 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1670
1671 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1672 if a and c have the same basic address snd step, and b and d have the same
1673 address and step. Therefore, if any a&c or b&d don't have the same address
1674 and step, we don't care the order of those two pairs after sorting. */
1675 int comp_res;
1676
1677 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1678 DR_BASE_ADDRESS (b1.dr))) != 0)
1679 return comp_res;
1680 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1681 DR_BASE_ADDRESS (b2.dr))) != 0)
1682 return comp_res;
1683 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1684 DR_STEP (b1.dr))) != 0)
1685 return comp_res;
1686 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1687 DR_STEP (b2.dr))) != 0)
1688 return comp_res;
1689 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1690 DR_OFFSET (b1.dr))) != 0)
1691 return comp_res;
1692 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1693 DR_INIT (b1.dr))) != 0)
1694 return comp_res;
1695 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1696 DR_OFFSET (b2.dr))) != 0)
1697 return comp_res;
1698 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1699 DR_INIT (b2.dr))) != 0)
1700 return comp_res;
1701
1702 return 0;
1703 }
1704
1705 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1706
1707 static void
1708 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1709 {
1710 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1711 DR_REF (alias_pair->first.dr),
1712 DR_REF (alias_pair->second.dr));
1713
1714 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1715 alias_pair->first.seg_len);
1716 if (!operand_equal_p (alias_pair->first.seg_len,
1717 alias_pair->second.seg_len, 0))
1718 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1719
1720 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1721 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1722 if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1723 {
1724 dump_printf (MSG_NOTE, " vs. ");
1725 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1726 }
1727
1728 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1729 alias_pair->first.align);
1730 if (alias_pair->first.align != alias_pair->second.align)
1731 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1732
1733 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1734 if (alias_pair->flags & DR_ALIAS_RAW)
1735 dump_printf (MSG_NOTE, " RAW");
1736 if (alias_pair->flags & DR_ALIAS_WAR)
1737 dump_printf (MSG_NOTE, " WAR");
1738 if (alias_pair->flags & DR_ALIAS_WAW)
1739 dump_printf (MSG_NOTE, " WAW");
1740 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1741 dump_printf (MSG_NOTE, " ARBITRARY");
1742 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1743 dump_printf (MSG_NOTE, " SWAPPED");
1744 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1745 dump_printf (MSG_NOTE, " UNSWAPPED");
1746 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1747 dump_printf (MSG_NOTE, " MIXED_STEPS");
1748 if (alias_pair->flags == 0)
1749 dump_printf (MSG_NOTE, " <none>");
1750 dump_printf (MSG_NOTE, "\n");
1751 }
1752
1753 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1754 FACTOR is number of iterations that each data reference is accessed.
1755
1756 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1757 we create an expression:
1758
1759 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1760 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1761
1762 for aliasing checks. However, in some cases we can decrease the number
1763 of checks by combining two checks into one. For example, suppose we have
1764 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1765 condition is satisfied:
1766
1767 load_ptr_0 < load_ptr_1 &&
1768 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1769
1770 (this condition means, in each iteration of vectorized loop, the accessed
1771 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1772 load_ptr_1.)
1773
1774 we then can use only the following expression to finish the alising checks
1775 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1776
1777 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1778 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1779
1780 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1781 basic address. */
1782
1783 void
1784 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1785 poly_uint64)
1786 {
1787 if (alias_pairs->is_empty ())
1788 return;
1789
1790 /* Canonicalize each pair so that the base components are ordered wrt
1791 data_ref_compare_tree. This allows the loop below to merge more
1792 cases. */
1793 unsigned int i;
1794 dr_with_seg_len_pair_t *alias_pair;
1795 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1796 {
1797 data_reference_p dr_a = alias_pair->first.dr;
1798 data_reference_p dr_b = alias_pair->second.dr;
1799 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1800 DR_BASE_ADDRESS (dr_b));
1801 if (comp_res == 0)
1802 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1803 if (comp_res == 0)
1804 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1805 if (comp_res > 0)
1806 {
1807 std::swap (alias_pair->first, alias_pair->second);
1808 alias_pair->flags |= DR_ALIAS_SWAPPED;
1809 }
1810 else
1811 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1812 }
1813
1814 /* Sort the collected data ref pairs so that we can scan them once to
1815 combine all possible aliasing checks. */
1816 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1817
1818 /* Scan the sorted dr pairs and check if we can combine alias checks
1819 of two neighboring dr pairs. */
1820 unsigned int last = 0;
1821 for (i = 1; i < alias_pairs->length (); ++i)
1822 {
1823 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1824 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1825 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1826
1827 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1828 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1829 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1830 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1831
1832 /* Remove duplicate data ref pairs. */
1833 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1834 {
1835 if (dump_enabled_p ())
1836 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1837 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1838 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1839 alias_pair1->flags |= alias_pair2->flags;
1840 continue;
1841 }
1842
1843 /* Assume that we won't be able to merge the pairs, then correct
1844 if we do. */
1845 last += 1;
1846 if (last != i)
1847 (*alias_pairs)[last] = (*alias_pairs)[i];
1848
1849 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1850 {
1851 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1852 and DR_A1 and DR_A2 are two consecutive memrefs. */
1853 if (*dr_a1 == *dr_a2)
1854 {
1855 std::swap (dr_a1, dr_b1);
1856 std::swap (dr_a2, dr_b2);
1857 }
1858
1859 poly_int64 init_a1, init_a2;
1860 /* Only consider cases in which the distance between the initial
1861 DR_A1 and the initial DR_A2 is known at compile time. */
1862 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1863 DR_BASE_ADDRESS (dr_a2->dr), 0)
1864 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1865 DR_OFFSET (dr_a2->dr), 0)
1866 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1867 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1868 continue;
1869
1870 /* Don't combine if we can't tell which one comes first. */
1871 if (!ordered_p (init_a1, init_a2))
1872 continue;
1873
1874 /* Work out what the segment length would be if we did combine
1875 DR_A1 and DR_A2:
1876
1877 - If DR_A1 and DR_A2 have equal lengths, that length is
1878 also the combined length.
1879
1880 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1881 length is the lower bound on those lengths.
1882
1883 - If DR_A1 and DR_A2 both have positive lengths, the combined
1884 length is the upper bound on those lengths.
1885
1886 Other cases are unlikely to give a useful combination.
1887
1888 The lengths both have sizetype, so the sign is taken from
1889 the step instead. */
1890 poly_uint64 new_seg_len = 0;
1891 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1892 dr_a2->seg_len, 0);
1893 if (new_seg_len_p)
1894 {
1895 poly_uint64 seg_len_a1, seg_len_a2;
1896 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1897 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1898 continue;
1899
1900 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1901 if (TREE_CODE (indicator_a) != INTEGER_CST)
1902 continue;
1903
1904 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1905 if (TREE_CODE (indicator_b) != INTEGER_CST)
1906 continue;
1907
1908 int sign_a = tree_int_cst_sgn (indicator_a);
1909 int sign_b = tree_int_cst_sgn (indicator_b);
1910
1911 if (sign_a <= 0 && sign_b <= 0)
1912 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1913 else if (sign_a >= 0 && sign_b >= 0)
1914 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1915 else
1916 continue;
1917 }
1918 /* At this point we're committed to merging the refs. */
1919
1920 /* Make sure dr_a1 starts left of dr_a2. */
1921 if (maybe_gt (init_a1, init_a2))
1922 {
1923 std::swap (*dr_a1, *dr_a2);
1924 std::swap (init_a1, init_a2);
1925 }
1926
1927 /* The DR_Bs are equal, so only the DR_As can introduce
1928 mixed steps. */
1929 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1930 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1931
1932 if (new_seg_len_p)
1933 {
1934 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1935 new_seg_len);
1936 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1937 }
1938
1939 /* This is always positive due to the swap above. */
1940 poly_uint64 diff = init_a2 - init_a1;
1941
1942 /* The new check will start at DR_A1. Make sure that its access
1943 size encompasses the initial DR_A2. */
1944 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1945 {
1946 dr_a1->access_size = upper_bound (dr_a1->access_size,
1947 diff + dr_a2->access_size);
1948 unsigned int new_align = known_alignment (dr_a1->access_size);
1949 dr_a1->align = MIN (dr_a1->align, new_align);
1950 }
1951 if (dump_enabled_p ())
1952 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1953 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1954 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1955 alias_pair1->flags |= alias_pair2->flags;
1956 last -= 1;
1957 }
1958 }
1959 alias_pairs->truncate (last + 1);
1960
1961 /* Try to restore the original dr_with_seg_len order within each
1962 dr_with_seg_len_pair_t. If we ended up combining swapped and
1963 unswapped pairs into the same check, we have to invalidate any
1964 RAW, WAR and WAW information for it. */
1965 if (dump_enabled_p ())
1966 dump_printf (MSG_NOTE, "merged alias checks:\n");
1967 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1968 {
1969 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1970 unsigned int swapped = (alias_pair->flags & swap_mask);
1971 if (swapped == DR_ALIAS_SWAPPED)
1972 std::swap (alias_pair->first, alias_pair->second);
1973 else if (swapped != DR_ALIAS_UNSWAPPED)
1974 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1975 alias_pair->flags &= ~swap_mask;
1976 if (dump_enabled_p ())
1977 dump_alias_pair (alias_pair, " ");
1978 }
1979 }
1980
1981 /* A subroutine of create_intersect_range_checks, with a subset of the
1982 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1983 to optimize cases in which the references form a simple RAW, WAR or
1984 WAR dependence. */
1985
1986 static bool
1987 create_ifn_alias_checks (tree *cond_expr,
1988 const dr_with_seg_len_pair_t &alias_pair)
1989 {
1990 const dr_with_seg_len& dr_a = alias_pair.first;
1991 const dr_with_seg_len& dr_b = alias_pair.second;
1992
1993 /* Check for cases in which:
1994
1995 (a) we have a known RAW, WAR or WAR dependence
1996 (b) the accesses are well-ordered in both the original and new code
1997 (see the comment above the DR_ALIAS_* flags for details); and
1998 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
1999 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2000 return false;
2001
2002 /* Make sure that both DRs access the same pattern of bytes,
2003 with a constant length and step. */
2004 poly_uint64 seg_len;
2005 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2006 || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2007 || maybe_ne (dr_a.access_size, dr_b.access_size)
2008 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2009 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2010 return false;
2011
2012 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2013 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2014 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2015
2016 /* See whether the target suports what we want to do. WAW checks are
2017 equivalent to WAR checks here. */
2018 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2019 ? IFN_CHECK_RAW_PTRS
2020 : IFN_CHECK_WAR_PTRS);
2021 unsigned int align = MIN (dr_a.align, dr_b.align);
2022 poly_uint64 full_length = seg_len + bytes;
2023 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2024 full_length, align))
2025 {
2026 full_length = seg_len + dr_a.access_size;
2027 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2028 full_length, align))
2029 return false;
2030 }
2031
2032 /* Commit to using this form of test. */
2033 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2034 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2035
2036 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2037 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2038
2039 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2040 ifn, boolean_type_node,
2041 4, addr_a, addr_b,
2042 size_int (full_length),
2043 size_int (align));
2044
2045 if (dump_enabled_p ())
2046 {
2047 if (ifn == IFN_CHECK_RAW_PTRS)
2048 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2049 else
2050 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2051 }
2052 return true;
2053 }
2054
2055 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2056 free of aliases, using a condition based on index values instead
2057 of a condition based on addresses. Return true on success,
2058 storing the condition in *COND_EXPR.
2059
2060 This can only be done if the two data references in ALIAS_PAIR access
2061 the same array object and the index is the only difference. For example,
2062 if the two data references are DR_A and DR_B:
2063
2064 DR_A DR_B
2065 data-ref arr[i] arr[j]
2066 base_object arr arr
2067 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2068
2069 The addresses and their index are like:
2070
2071 |<- ADDR_A ->| |<- ADDR_B ->|
2072 ------------------------------------------------------->
2073 | | | | | | | | | |
2074 ------------------------------------------------------->
2075 i_0 ... i_0+4 j_0 ... j_0+4
2076
2077 We can create expression based on index rather than address:
2078
2079 (unsigned) (i_0 - j_0 + 3) <= 6
2080
2081 i.e. the indices are less than 4 apart.
2082
2083 Note evolution step of index needs to be considered in comparison. */
2084
2085 static bool
2086 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2087 const dr_with_seg_len_pair_t &alias_pair)
2088 {
2089 const dr_with_seg_len &dr_a = alias_pair.first;
2090 const dr_with_seg_len &dr_b = alias_pair.second;
2091 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2092 || integer_zerop (DR_STEP (dr_a.dr))
2093 || integer_zerop (DR_STEP (dr_b.dr))
2094 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2095 return false;
2096
2097 poly_uint64 seg_len1, seg_len2;
2098 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2099 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2100 return false;
2101
2102 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2103 return false;
2104
2105 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2106 return false;
2107
2108 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2109 return false;
2110
2111 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2112
2113 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2114 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2115 if (neg_step)
2116 {
2117 abs_step = -abs_step;
2118 seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2119 seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2120 }
2121
2122 /* Infer the number of iterations with which the memory segment is accessed
2123 by DR. In other words, alias is checked if memory segment accessed by
2124 DR_A in some iterations intersect with memory segment accessed by DR_B
2125 in the same amount iterations.
2126 Note segnment length is a linear function of number of iterations with
2127 DR_STEP as the coefficient. */
2128 poly_uint64 niter_len1, niter_len2;
2129 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2130 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2131 return false;
2132
2133 /* Divide each access size by the byte step, rounding up. */
2134 poly_uint64 niter_access1, niter_access2;
2135 if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2136 abs_step, &niter_access1)
2137 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2138 abs_step, &niter_access2))
2139 return false;
2140
2141 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2142
2143 int found = -1;
2144 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2145 {
2146 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2147 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2148 /* Two indices must be the same if they are not scev, or not scev wrto
2149 current loop being vecorized. */
2150 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2151 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2152 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2153 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2154 {
2155 if (operand_equal_p (access1, access2, 0))
2156 continue;
2157
2158 return false;
2159 }
2160 if (found >= 0)
2161 return false;
2162 found = i;
2163 }
2164
2165 /* Ought not to happen in practice, since if all accesses are equal then the
2166 alias should be decidable at compile time. */
2167 if (found < 0)
2168 return false;
2169
2170 /* The two indices must have the same step. */
2171 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2172 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2173 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2174 return false;
2175
2176 tree idx_step = CHREC_RIGHT (access1);
2177 /* Index must have const step, otherwise DR_STEP won't be constant. */
2178 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2179 /* Index must evaluate in the same direction as DR. */
2180 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2181
2182 tree min1 = CHREC_LEFT (access1);
2183 tree min2 = CHREC_LEFT (access2);
2184 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2185 return false;
2186
2187 /* Ideally, alias can be checked against loop's control IV, but we
2188 need to prove linear mapping between control IV and reference
2189 index. Although that should be true, we check against (array)
2190 index of data reference. Like segment length, index length is
2191 linear function of the number of iterations with index_step as
2192 the coefficient, i.e, niter_len * idx_step. */
2193 offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2194 SIGNED);
2195 if (neg_step)
2196 abs_idx_step = -abs_idx_step;
2197 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2198 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2199 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2200 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2201
2202 gcc_assert (known_ge (idx_len1, 0)
2203 && known_ge (idx_len2, 0)
2204 && known_ge (idx_access1, 0)
2205 && known_ge (idx_access2, 0));
2206
2207 /* Each access has the following pattern, with lengths measured
2208 in units of INDEX:
2209
2210 <-- idx_len -->
2211 <--- A: -ve step --->
2212 +-----+-------+-----+-------+-----+
2213 | n-1 | ..... | 0 | ..... | n-1 |
2214 +-----+-------+-----+-------+-----+
2215 <--- B: +ve step --->
2216 <-- idx_len -->
2217 |
2218 min
2219
2220 where "n" is the number of scalar iterations covered by the segment
2221 and where each access spans idx_access units.
2222
2223 A is the range of bytes accessed when the step is negative,
2224 B is the range when the step is positive.
2225
2226 When checking for general overlap, we need to test whether
2227 the range:
2228
2229 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2230
2231 overlaps:
2232
2233 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2234
2235 where:
2236
2237 low_offsetN = +ve step ? 0 : -idx_lenN;
2238 high_offsetN = +ve step ? idx_lenN : 0;
2239
2240 This is equivalent to testing whether:
2241
2242 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2243 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2244
2245 Converting this into a single test, there is an overlap if:
2246
2247 0 <= min2 - min1 + bias <= limit
2248
2249 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2250 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2251 + (high_offset2 - low_offset2 + idx_access2 - 1)
2252 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2253
2254 Combining the tests requires limit to be computable in an unsigned
2255 form of the index type; if it isn't, we fall back to the usual
2256 pointer-based checks.
2257
2258 We can do better if DR_B is a write and if DR_A and DR_B are
2259 well-ordered in both the original and the new code (see the
2260 comment above the DR_ALIAS_* flags for details). In this case
2261 we know that for each i in [0, n-1], the write performed by
2262 access i of DR_B occurs after access numbers j<=i of DR_A in
2263 both the original and the new code. Any write or anti
2264 dependencies wrt those DR_A accesses are therefore maintained.
2265
2266 We just need to make sure that each individual write in DR_B does not
2267 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2268 after the DR_B access in the original code but happen before it in
2269 the new code.
2270
2271 We know the steps for both accesses are equal, so by induction, we
2272 just need to test whether the first write of DR_B overlaps a later
2273 access of DR_A. In other words, we need to move min1 along by
2274 one iteration:
2275
2276 min1' = min1 + idx_step
2277
2278 and use the ranges:
2279
2280 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2281
2282 and:
2283
2284 [min2, min2 + idx_access2 - 1]
2285
2286 where:
2287
2288 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2289 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2290 if (waw_or_war_p)
2291 idx_len1 -= abs_idx_step;
2292
2293 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2294 if (!waw_or_war_p)
2295 limit += idx_len2;
2296
2297 tree utype = unsigned_type_for (TREE_TYPE (min1));
2298 if (!wi::fits_to_tree_p (limit, utype))
2299 return false;
2300
2301 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2302 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2303 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2304 /* Equivalent to adding IDX_STEP to MIN1. */
2305 if (waw_or_war_p)
2306 bias -= wi::to_offset (idx_step);
2307
2308 tree subject = fold_build2 (MINUS_EXPR, utype,
2309 fold_convert (utype, min2),
2310 fold_convert (utype, min1));
2311 subject = fold_build2 (PLUS_EXPR, utype, subject,
2312 wide_int_to_tree (utype, bias));
2313 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2314 wide_int_to_tree (utype, limit));
2315 if (*cond_expr)
2316 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2317 *cond_expr, part_cond_expr);
2318 else
2319 *cond_expr = part_cond_expr;
2320 if (dump_enabled_p ())
2321 {
2322 if (waw_or_war_p)
2323 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2324 else
2325 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2326 }
2327 return true;
2328 }
2329
2330 /* A subroutine of create_intersect_range_checks, with a subset of the
2331 same arguments. Try to optimize cases in which the second access
2332 is a write and in which some overlap is valid. */
2333
2334 static bool
2335 create_waw_or_war_checks (tree *cond_expr,
2336 const dr_with_seg_len_pair_t &alias_pair)
2337 {
2338 const dr_with_seg_len& dr_a = alias_pair.first;
2339 const dr_with_seg_len& dr_b = alias_pair.second;
2340
2341 /* Check for cases in which:
2342
2343 (a) DR_B is always a write;
2344 (b) the accesses are well-ordered in both the original and new code
2345 (see the comment above the DR_ALIAS_* flags for details); and
2346 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2347 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2348 return false;
2349
2350 /* Check for equal (but possibly variable) steps. */
2351 tree step = DR_STEP (dr_a.dr);
2352 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2353 return false;
2354
2355 /* Make sure that we can operate on sizetype without loss of precision. */
2356 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2357 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2358 return false;
2359
2360 /* All addresses involved are known to have a common alignment ALIGN.
2361 We can therefore subtract ALIGN from an exclusive endpoint to get
2362 an inclusive endpoint. In the best (and common) case, ALIGN is the
2363 same as the access sizes of both DRs, and so subtracting ALIGN
2364 cancels out the addition of an access size. */
2365 unsigned int align = MIN (dr_a.align, dr_b.align);
2366 poly_uint64 last_chunk_a = dr_a.access_size - align;
2367 poly_uint64 last_chunk_b = dr_b.access_size - align;
2368
2369 /* Get a boolean expression that is true when the step is negative. */
2370 tree indicator = dr_direction_indicator (dr_a.dr);
2371 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2372 fold_convert (ssizetype, indicator),
2373 ssize_int (0));
2374
2375 /* Get lengths in sizetype. */
2376 tree seg_len_a
2377 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2378 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2379
2380 /* Each access has the following pattern:
2381
2382 <- |seg_len| ->
2383 <--- A: -ve step --->
2384 +-----+-------+-----+-------+-----+
2385 | n-1 | ..... | 0 | ..... | n-1 |
2386 +-----+-------+-----+-------+-----+
2387 <--- B: +ve step --->
2388 <- |seg_len| ->
2389 |
2390 base address
2391
2392 where "n" is the number of scalar iterations covered by the segment.
2393
2394 A is the range of bytes accessed when the step is negative,
2395 B is the range when the step is positive.
2396
2397 We know that DR_B is a write. We also know (from checking that
2398 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2399 the write performed by access i of DR_B occurs after access numbers
2400 j<=i of DR_A in both the original and the new code. Any write or
2401 anti dependencies wrt those DR_A accesses are therefore maintained.
2402
2403 We just need to make sure that each individual write in DR_B does not
2404 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2405 after the DR_B access in the original code but happen before it in
2406 the new code.
2407
2408 We know the steps for both accesses are equal, so by induction, we
2409 just need to test whether the first write of DR_B overlaps a later
2410 access of DR_A. In other words, we need to move addr_a along by
2411 one iteration:
2412
2413 addr_a' = addr_a + step
2414
2415 and check whether:
2416
2417 [addr_b, addr_b + last_chunk_b]
2418
2419 overlaps:
2420
2421 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2422
2423 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2424
2425 low_offset_a = +ve step ? 0 : seg_len_a - step
2426 high_offset_a = +ve step ? seg_len_a - step : 0
2427
2428 This is equivalent to testing whether:
2429
2430 addr_a' + low_offset_a <= addr_b + last_chunk_b
2431 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2432
2433 Converting this into a single test, there is an overlap if:
2434
2435 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2436
2437 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2438
2439 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2440 less than the size of the object underlying DR_A. We also know
2441 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2442 guaranteed at compile time. There can therefore be no overflow if
2443 "limit" is calculated in an unsigned type with pointer precision. */
2444 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2445 DR_OFFSET (dr_a.dr));
2446 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2447
2448 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2449 DR_OFFSET (dr_b.dr));
2450 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2451
2452 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2453 addr_a = fold_build_pointer_plus (addr_a, step);
2454 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2455 seg_len_a, step);
2456 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2457 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2458
2459 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2460 seg_len_a_minus_step, size_zero_node);
2461 if (!CONSTANT_CLASS_P (low_offset_a))
2462 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2463
2464 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2465 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2466 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2467 low_offset_a);
2468
2469 /* The amount added to addr_b - addr_a'. */
2470 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2471 size_int (last_chunk_b), low_offset_a);
2472
2473 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2474 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2475 size_int (last_chunk_a + last_chunk_b));
2476
2477 tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2478 subject = fold_build2 (PLUS_EXPR, sizetype,
2479 fold_convert (sizetype, subject), bias);
2480
2481 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2482 if (dump_enabled_p ())
2483 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2484 return true;
2485 }
2486
2487 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2488 every address ADDR accessed by D:
2489
2490 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2491
2492 In this case, every element accessed by D is aligned to at least
2493 ALIGN bytes.
2494
2495 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2496
2497 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2498
2499 static void
2500 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2501 tree *seg_max_out, HOST_WIDE_INT align)
2502 {
2503 /* Each access has the following pattern:
2504
2505 <- |seg_len| ->
2506 <--- A: -ve step --->
2507 +-----+-------+-----+-------+-----+
2508 | n-1 | ,.... | 0 | ..... | n-1 |
2509 +-----+-------+-----+-------+-----+
2510 <--- B: +ve step --->
2511 <- |seg_len| ->
2512 |
2513 base address
2514
2515 where "n" is the number of scalar iterations covered by the segment.
2516 (This should be VF for a particular pair if we know that both steps
2517 are the same, otherwise it will be the full number of scalar loop
2518 iterations.)
2519
2520 A is the range of bytes accessed when the step is negative,
2521 B is the range when the step is positive.
2522
2523 If the access size is "access_size" bytes, the lowest addressed byte is:
2524
2525 base + (step < 0 ? seg_len : 0) [LB]
2526
2527 and the highest addressed byte is always below:
2528
2529 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2530
2531 Thus:
2532
2533 LB <= ADDR < UB
2534
2535 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2536 bytes, so:
2537
2538 LB <= ADDR <= UB - ALIGN
2539
2540 where "- ALIGN" folds naturally with the "+ access_size" and often
2541 cancels it out.
2542
2543 We don't try to simplify LB and UB beyond this (e.g. by using
2544 MIN and MAX based on whether seg_len rather than the stride is
2545 negative) because it is possible for the absolute size of the
2546 segment to overflow the range of a ssize_t.
2547
2548 Keeping the pointer_plus outside of the cond_expr should allow
2549 the cond_exprs to be shared with other alias checks. */
2550 tree indicator = dr_direction_indicator (d.dr);
2551 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2552 fold_convert (ssizetype, indicator),
2553 ssize_int (0));
2554 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2555 DR_OFFSET (d.dr));
2556 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2557 tree seg_len
2558 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2559
2560 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2561 seg_len, size_zero_node);
2562 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2563 size_zero_node, seg_len);
2564 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2565 size_int (d.access_size - align));
2566
2567 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2568 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2569 }
2570
2571 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2572 storing the condition in *COND_EXPR. The fallback is to generate a
2573 a test that the two accesses do not overlap:
2574
2575 end_a <= start_b || end_b <= start_a. */
2576
2577 static void
2578 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2579 const dr_with_seg_len_pair_t &alias_pair)
2580 {
2581 const dr_with_seg_len& dr_a = alias_pair.first;
2582 const dr_with_seg_len& dr_b = alias_pair.second;
2583 *cond_expr = NULL_TREE;
2584 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2585 return;
2586
2587 if (create_ifn_alias_checks (cond_expr, alias_pair))
2588 return;
2589
2590 if (create_waw_or_war_checks (cond_expr, alias_pair))
2591 return;
2592
2593 unsigned HOST_WIDE_INT min_align;
2594 tree_code cmp_code;
2595 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2596 are equivalent. This is just an optimization heuristic. */
2597 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2598 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2599 {
2600 /* In this case adding access_size to seg_len is likely to give
2601 a simple X * step, where X is either the number of scalar
2602 iterations or the vectorization factor. We're better off
2603 keeping that, rather than subtracting an alignment from it.
2604
2605 In this case the maximum values are exclusive and so there is
2606 no alias if the maximum of one segment equals the minimum
2607 of another. */
2608 min_align = 0;
2609 cmp_code = LE_EXPR;
2610 }
2611 else
2612 {
2613 /* Calculate the minimum alignment shared by all four pointers,
2614 then arrange for this alignment to be subtracted from the
2615 exclusive maximum values to get inclusive maximum values.
2616 This "- min_align" is cumulative with a "+ access_size"
2617 in the calculation of the maximum values. In the best
2618 (and common) case, the two cancel each other out, leaving
2619 us with an inclusive bound based only on seg_len. In the
2620 worst case we're simply adding a smaller number than before.
2621
2622 Because the maximum values are inclusive, there is an alias
2623 if the maximum value of one segment is equal to the minimum
2624 value of the other. */
2625 min_align = MIN (dr_a.align, dr_b.align);
2626 cmp_code = LT_EXPR;
2627 }
2628
2629 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2630 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2631 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2632
2633 *cond_expr
2634 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2635 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2636 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2637 if (dump_enabled_p ())
2638 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2639 }
2640
2641 /* Create a conditional expression that represents the run-time checks for
2642 overlapping of address ranges represented by a list of data references
2643 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2644 COND_EXPR is the conditional expression to be used in the if statement
2645 that controls which version of the loop gets executed at runtime. */
2646
2647 void
2648 create_runtime_alias_checks (class loop *loop,
2649 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2650 tree * cond_expr)
2651 {
2652 tree part_cond_expr;
2653
2654 fold_defer_overflow_warnings ();
2655 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2656 {
2657 gcc_assert (alias_pair.flags);
2658 if (dump_enabled_p ())
2659 dump_printf (MSG_NOTE,
2660 "create runtime check for data references %T and %T\n",
2661 DR_REF (alias_pair.first.dr),
2662 DR_REF (alias_pair.second.dr));
2663
2664 /* Create condition expression for each pair data references. */
2665 create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2666 if (*cond_expr)
2667 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2668 *cond_expr, part_cond_expr);
2669 else
2670 *cond_expr = part_cond_expr;
2671 }
2672 fold_undefer_and_ignore_overflow_warnings ();
2673 }
2674
2675 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2676 expressions. */
2677 static bool
2678 dr_equal_offsets_p1 (tree offset1, tree offset2)
2679 {
2680 bool res;
2681
2682 STRIP_NOPS (offset1);
2683 STRIP_NOPS (offset2);
2684
2685 if (offset1 == offset2)
2686 return true;
2687
2688 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2689 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2690 return false;
2691
2692 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2693 TREE_OPERAND (offset2, 0));
2694
2695 if (!res || !BINARY_CLASS_P (offset1))
2696 return res;
2697
2698 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2699 TREE_OPERAND (offset2, 1));
2700
2701 return res;
2702 }
2703
2704 /* Check if DRA and DRB have equal offsets. */
2705 bool
2706 dr_equal_offsets_p (struct data_reference *dra,
2707 struct data_reference *drb)
2708 {
2709 tree offset1, offset2;
2710
2711 offset1 = DR_OFFSET (dra);
2712 offset2 = DR_OFFSET (drb);
2713
2714 return dr_equal_offsets_p1 (offset1, offset2);
2715 }
2716
2717 /* Returns true if FNA == FNB. */
2718
2719 static bool
2720 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2721 {
2722 unsigned i, n = fna.length ();
2723
2724 if (n != fnb.length ())
2725 return false;
2726
2727 for (i = 0; i < n; i++)
2728 if (!operand_equal_p (fna[i], fnb[i], 0))
2729 return false;
2730
2731 return true;
2732 }
2733
2734 /* If all the functions in CF are the same, returns one of them,
2735 otherwise returns NULL. */
2736
2737 static affine_fn
2738 common_affine_function (conflict_function *cf)
2739 {
2740 unsigned i;
2741 affine_fn comm;
2742
2743 if (!CF_NONTRIVIAL_P (cf))
2744 return affine_fn ();
2745
2746 comm = cf->fns[0];
2747
2748 for (i = 1; i < cf->n; i++)
2749 if (!affine_function_equal_p (comm, cf->fns[i]))
2750 return affine_fn ();
2751
2752 return comm;
2753 }
2754
2755 /* Returns the base of the affine function FN. */
2756
2757 static tree
2758 affine_function_base (affine_fn fn)
2759 {
2760 return fn[0];
2761 }
2762
2763 /* Returns true if FN is a constant. */
2764
2765 static bool
2766 affine_function_constant_p (affine_fn fn)
2767 {
2768 unsigned i;
2769 tree coef;
2770
2771 for (i = 1; fn.iterate (i, &coef); i++)
2772 if (!integer_zerop (coef))
2773 return false;
2774
2775 return true;
2776 }
2777
2778 /* Returns true if FN is the zero constant function. */
2779
2780 static bool
2781 affine_function_zero_p (affine_fn fn)
2782 {
2783 return (integer_zerop (affine_function_base (fn))
2784 && affine_function_constant_p (fn));
2785 }
2786
2787 /* Returns a signed integer type with the largest precision from TA
2788 and TB. */
2789
2790 static tree
2791 signed_type_for_types (tree ta, tree tb)
2792 {
2793 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2794 return signed_type_for (ta);
2795 else
2796 return signed_type_for (tb);
2797 }
2798
2799 /* Applies operation OP on affine functions FNA and FNB, and returns the
2800 result. */
2801
2802 static affine_fn
2803 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2804 {
2805 unsigned i, n, m;
2806 affine_fn ret;
2807 tree coef;
2808
2809 if (fnb.length () > fna.length ())
2810 {
2811 n = fna.length ();
2812 m = fnb.length ();
2813 }
2814 else
2815 {
2816 n = fnb.length ();
2817 m = fna.length ();
2818 }
2819
2820 ret.create (m);
2821 for (i = 0; i < n; i++)
2822 {
2823 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2824 TREE_TYPE (fnb[i]));
2825 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2826 }
2827
2828 for (; fna.iterate (i, &coef); i++)
2829 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2830 coef, integer_zero_node));
2831 for (; fnb.iterate (i, &coef); i++)
2832 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2833 integer_zero_node, coef));
2834
2835 return ret;
2836 }
2837
2838 /* Returns the sum of affine functions FNA and FNB. */
2839
2840 static affine_fn
2841 affine_fn_plus (affine_fn fna, affine_fn fnb)
2842 {
2843 return affine_fn_op (PLUS_EXPR, fna, fnb);
2844 }
2845
2846 /* Returns the difference of affine functions FNA and FNB. */
2847
2848 static affine_fn
2849 affine_fn_minus (affine_fn fna, affine_fn fnb)
2850 {
2851 return affine_fn_op (MINUS_EXPR, fna, fnb);
2852 }
2853
2854 /* Frees affine function FN. */
2855
2856 static void
2857 affine_fn_free (affine_fn fn)
2858 {
2859 fn.release ();
2860 }
2861
2862 /* Determine for each subscript in the data dependence relation DDR
2863 the distance. */
2864
2865 static void
2866 compute_subscript_distance (struct data_dependence_relation *ddr)
2867 {
2868 conflict_function *cf_a, *cf_b;
2869 affine_fn fn_a, fn_b, diff;
2870
2871 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2872 {
2873 unsigned int i;
2874
2875 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2876 {
2877 struct subscript *subscript;
2878
2879 subscript = DDR_SUBSCRIPT (ddr, i);
2880 cf_a = SUB_CONFLICTS_IN_A (subscript);
2881 cf_b = SUB_CONFLICTS_IN_B (subscript);
2882
2883 fn_a = common_affine_function (cf_a);
2884 fn_b = common_affine_function (cf_b);
2885 if (!fn_a.exists () || !fn_b.exists ())
2886 {
2887 SUB_DISTANCE (subscript) = chrec_dont_know;
2888 return;
2889 }
2890 diff = affine_fn_minus (fn_a, fn_b);
2891
2892 if (affine_function_constant_p (diff))
2893 SUB_DISTANCE (subscript) = affine_function_base (diff);
2894 else
2895 SUB_DISTANCE (subscript) = chrec_dont_know;
2896
2897 affine_fn_free (diff);
2898 }
2899 }
2900 }
2901
2902 /* Returns the conflict function for "unknown". */
2903
2904 static conflict_function *
2905 conflict_fn_not_known (void)
2906 {
2907 conflict_function *fn = XCNEW (conflict_function);
2908 fn->n = NOT_KNOWN;
2909
2910 return fn;
2911 }
2912
2913 /* Returns the conflict function for "independent". */
2914
2915 static conflict_function *
2916 conflict_fn_no_dependence (void)
2917 {
2918 conflict_function *fn = XCNEW (conflict_function);
2919 fn->n = NO_DEPENDENCE;
2920
2921 return fn;
2922 }
2923
2924 /* Returns true if the address of OBJ is invariant in LOOP. */
2925
2926 static bool
2927 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2928 {
2929 while (handled_component_p (obj))
2930 {
2931 if (TREE_CODE (obj) == ARRAY_REF)
2932 {
2933 for (int i = 1; i < 4; ++i)
2934 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2935 loop->num))
2936 return false;
2937 }
2938 else if (TREE_CODE (obj) == COMPONENT_REF)
2939 {
2940 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2941 loop->num))
2942 return false;
2943 }
2944 obj = TREE_OPERAND (obj, 0);
2945 }
2946
2947 if (!INDIRECT_REF_P (obj)
2948 && TREE_CODE (obj) != MEM_REF)
2949 return true;
2950
2951 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2952 loop->num);
2953 }
2954
2955 /* Returns false if we can prove that data references A and B do not alias,
2956 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2957 considered. */
2958
2959 bool
2960 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2961 class loop *loop_nest)
2962 {
2963 tree addr_a = DR_BASE_OBJECT (a);
2964 tree addr_b = DR_BASE_OBJECT (b);
2965
2966 /* If we are not processing a loop nest but scalar code we
2967 do not need to care about possible cross-iteration dependences
2968 and thus can process the full original reference. Do so,
2969 similar to how loop invariant motion applies extra offset-based
2970 disambiguation. */
2971 if (!loop_nest)
2972 {
2973 tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2974 tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2975
2976 if (DR_BASE_ADDRESS (a)
2977 && DR_BASE_ADDRESS (b)
2978 && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2979 && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2980 && poly_int_tree_p (tree_size_a)
2981 && poly_int_tree_p (tree_size_b)
2982 && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
2983 wi::to_poly_widest (tree_size_a),
2984 wi::to_poly_widest (DR_INIT (b)),
2985 wi::to_poly_widest (tree_size_b)))
2986 {
2987 gcc_assert (integer_zerop (DR_STEP (a))
2988 && integer_zerop (DR_STEP (b)));
2989 return false;
2990 }
2991
2992 aff_tree off1, off2;
2993 poly_widest_int size1, size2;
2994 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2995 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2996 aff_combination_scale (&off1, -1);
2997 aff_combination_add (&off2, &off1);
2998 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2999 return false;
3000 }
3001
3002 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3003 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3004 /* For cross-iteration dependences the cliques must be valid for the
3005 whole loop, not just individual iterations. */
3006 && (!loop_nest
3007 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3008 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3009 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3010 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3011 return false;
3012
3013 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3014 do not know the size of the base-object. So we cannot do any
3015 offset/overlap based analysis but have to rely on points-to
3016 information only. */
3017 if (TREE_CODE (addr_a) == MEM_REF
3018 && (DR_UNCONSTRAINED_BASE (a)
3019 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3020 {
3021 /* For true dependences we can apply TBAA. */
3022 if (flag_strict_aliasing
3023 && DR_IS_WRITE (a) && DR_IS_READ (b)
3024 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3025 get_alias_set (DR_REF (b))))
3026 return false;
3027 if (TREE_CODE (addr_b) == MEM_REF)
3028 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3029 TREE_OPERAND (addr_b, 0));
3030 else
3031 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3032 build_fold_addr_expr (addr_b));
3033 }
3034 else if (TREE_CODE (addr_b) == MEM_REF
3035 && (DR_UNCONSTRAINED_BASE (b)
3036 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3037 {
3038 /* For true dependences we can apply TBAA. */
3039 if (flag_strict_aliasing
3040 && DR_IS_WRITE (a) && DR_IS_READ (b)
3041 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3042 get_alias_set (DR_REF (b))))
3043 return false;
3044 if (TREE_CODE (addr_a) == MEM_REF)
3045 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3046 TREE_OPERAND (addr_b, 0));
3047 else
3048 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3049 TREE_OPERAND (addr_b, 0));
3050 }
3051
3052 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3053 that is being subsetted in the loop nest. */
3054 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3055 return refs_output_dependent_p (addr_a, addr_b);
3056 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3057 return refs_anti_dependent_p (addr_a, addr_b);
3058 return refs_may_alias_p (addr_a, addr_b);
3059 }
3060
3061 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3062 if it is meaningful to compare their associated access functions
3063 when checking for dependencies. */
3064
3065 static bool
3066 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3067 {
3068 /* Allow pairs of component refs from the following sets:
3069
3070 { REALPART_EXPR, IMAGPART_EXPR }
3071 { COMPONENT_REF }
3072 { ARRAY_REF }. */
3073 tree_code code_a = TREE_CODE (ref_a);
3074 tree_code code_b = TREE_CODE (ref_b);
3075 if (code_a == IMAGPART_EXPR)
3076 code_a = REALPART_EXPR;
3077 if (code_b == IMAGPART_EXPR)
3078 code_b = REALPART_EXPR;
3079 if (code_a != code_b)
3080 return false;
3081
3082 if (TREE_CODE (ref_a) == COMPONENT_REF)
3083 /* ??? We cannot simply use the type of operand #0 of the refs here as
3084 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3085 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3086 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3087 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3088
3089 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3090 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3091 }
3092
3093 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3094 is true when the main indices of A and B were not comparable so we try again
3095 with alternate indices computed on an indirect reference. */
3096
3097 struct data_dependence_relation *
3098 initialize_data_dependence_relation (struct data_dependence_relation *res,
3099 vec<loop_p> loop_nest,
3100 bool use_alt_indices)
3101 {
3102 struct data_reference *a = DDR_A (res);
3103 struct data_reference *b = DDR_B (res);
3104 unsigned int i;
3105
3106 struct indices *indices_a = &a->indices;
3107 struct indices *indices_b = &b->indices;
3108 if (use_alt_indices)
3109 {
3110 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3111 indices_a = &a->alt_indices;
3112 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3113 indices_b = &b->alt_indices;
3114 }
3115 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3116 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3117 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3118 {
3119 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3120 return res;
3121 }
3122
3123 /* For unconstrained bases, the root (highest-indexed) subscript
3124 describes a variation in the base of the original DR_REF rather
3125 than a component access. We have no type that accurately describes
3126 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3127 applying this subscript) so limit the search to the last real
3128 component access.
3129
3130 E.g. for:
3131
3132 void
3133 f (int a[][8], int b[][8])
3134 {
3135 for (int i = 0; i < 8; ++i)
3136 a[i * 2][0] = b[i][0];
3137 }
3138
3139 the a and b accesses have a single ARRAY_REF component reference [0]
3140 but have two subscripts. */
3141 if (indices_a->unconstrained_base)
3142 num_dimensions_a -= 1;
3143 if (indices_b->unconstrained_base)
3144 num_dimensions_b -= 1;
3145
3146 /* These structures describe sequences of component references in
3147 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3148 specific access function. */
3149 struct {
3150 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3151 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3152 indices. In C notation, these are the indices of the rightmost
3153 component references; e.g. for a sequence .b.c.d, the start
3154 index is for .d. */
3155 unsigned int start_a;
3156 unsigned int start_b;
3157
3158 /* The sequence contains LENGTH consecutive access functions from
3159 each DR. */
3160 unsigned int length;
3161
3162 /* The enclosing objects for the A and B sequences respectively,
3163 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3164 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3165 tree object_a;
3166 tree object_b;
3167 } full_seq = {}, struct_seq = {};
3168
3169 /* Before each iteration of the loop:
3170
3171 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3172 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3173 unsigned int index_a = 0;
3174 unsigned int index_b = 0;
3175 tree ref_a = DR_REF (a);
3176 tree ref_b = DR_REF (b);
3177
3178 /* Now walk the component references from the final DR_REFs back up to
3179 the enclosing base objects. Each component reference corresponds
3180 to one access function in the DR, with access function 0 being for
3181 the final DR_REF and the highest-indexed access function being the
3182 one that is applied to the base of the DR.
3183
3184 Look for a sequence of component references whose access functions
3185 are comparable (see access_fn_components_comparable_p). If more
3186 than one such sequence exists, pick the one nearest the base
3187 (which is the leftmost sequence in C notation). Store this sequence
3188 in FULL_SEQ.
3189
3190 For example, if we have:
3191
3192 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3193
3194 A: a[0][i].s.c.d
3195 B: __real b[0][i].s.e[i].f
3196
3197 (where d is the same type as the real component of f) then the access
3198 functions would be:
3199
3200 0 1 2 3
3201 A: .d .c .s [i]
3202
3203 0 1 2 3 4 5
3204 B: __real .f [i] .e .s [i]
3205
3206 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3207 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3208 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3209 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3210 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3211 index foo[10] arrays, so is again comparable. The sequence is
3212 therefore:
3213
3214 A: [1, 3] (i.e. [i].s.c)
3215 B: [3, 5] (i.e. [i].s.e)
3216
3217 Also look for sequences of component references whose access
3218 functions are comparable and whose enclosing objects have the same
3219 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3220 example, STRUCT_SEQ would be:
3221
3222 A: [1, 2] (i.e. s.c)
3223 B: [3, 4] (i.e. s.e) */
3224 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3225 {
3226 /* The alternate indices form always has a single dimension
3227 with unconstrained base. */
3228 gcc_assert (!use_alt_indices);
3229
3230 /* REF_A and REF_B must be one of the component access types
3231 allowed by dr_analyze_indices. */
3232 gcc_checking_assert (access_fn_component_p (ref_a));
3233 gcc_checking_assert (access_fn_component_p (ref_b));
3234
3235 /* Get the immediately-enclosing objects for REF_A and REF_B,
3236 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3237 and DR_ACCESS_FN (B, INDEX_B). */
3238 tree object_a = TREE_OPERAND (ref_a, 0);
3239 tree object_b = TREE_OPERAND (ref_b, 0);
3240
3241 tree type_a = TREE_TYPE (object_a);
3242 tree type_b = TREE_TYPE (object_b);
3243 if (access_fn_components_comparable_p (ref_a, ref_b))
3244 {
3245 /* This pair of component accesses is comparable for dependence
3246 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3247 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3248 if (full_seq.start_a + full_seq.length != index_a
3249 || full_seq.start_b + full_seq.length != index_b)
3250 {
3251 /* The accesses don't extend the current sequence,
3252 so start a new one here. */
3253 full_seq.start_a = index_a;
3254 full_seq.start_b = index_b;
3255 full_seq.length = 0;
3256 }
3257
3258 /* Add this pair of references to the sequence. */
3259 full_seq.length += 1;
3260 full_seq.object_a = object_a;
3261 full_seq.object_b = object_b;
3262
3263 /* If the enclosing objects are structures (and thus have the
3264 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3265 if (TREE_CODE (type_a) == RECORD_TYPE)
3266 struct_seq = full_seq;
3267
3268 /* Move to the next containing reference for both A and B. */
3269 ref_a = object_a;
3270 ref_b = object_b;
3271 index_a += 1;
3272 index_b += 1;
3273 continue;
3274 }
3275
3276 /* Try to approach equal type sizes. */
3277 if (!COMPLETE_TYPE_P (type_a)
3278 || !COMPLETE_TYPE_P (type_b)
3279 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3280 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3281 break;
3282
3283 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3284 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3285 if (size_a <= size_b)
3286 {
3287 index_a += 1;
3288 ref_a = object_a;
3289 }
3290 if (size_b <= size_a)
3291 {
3292 index_b += 1;
3293 ref_b = object_b;
3294 }
3295 }
3296
3297 /* See whether FULL_SEQ ends at the base and whether the two bases
3298 are equal. We do not care about TBAA or alignment info so we can
3299 use OEP_ADDRESS_OF to avoid false negatives. */
3300 tree base_a = indices_a->base_object;
3301 tree base_b = indices_b->base_object;
3302 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3303 && full_seq.start_b + full_seq.length == num_dimensions_b
3304 && (indices_a->unconstrained_base
3305 == indices_b->unconstrained_base)
3306 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3307 && (types_compatible_p (TREE_TYPE (base_a),
3308 TREE_TYPE (base_b))
3309 || (!base_supports_access_fn_components_p (base_a)
3310 && !base_supports_access_fn_components_p (base_b)
3311 && operand_equal_p
3312 (TYPE_SIZE (TREE_TYPE (base_a)),
3313 TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3314 && (!loop_nest.exists ()
3315 || (object_address_invariant_in_loop_p
3316 (loop_nest[0], base_a))));
3317
3318 /* If the bases are the same, we can include the base variation too.
3319 E.g. the b accesses in:
3320
3321 for (int i = 0; i < n; ++i)
3322 b[i + 4][0] = b[i][0];
3323
3324 have a definite dependence distance of 4, while for:
3325
3326 for (int i = 0; i < n; ++i)
3327 a[i + 4][0] = b[i][0];
3328
3329 the dependence distance depends on the gap between a and b.
3330
3331 If the bases are different then we can only rely on the sequence
3332 rooted at a structure access, since arrays are allowed to overlap
3333 arbitrarily and change shape arbitrarily. E.g. we treat this as
3334 valid code:
3335
3336 int a[256];
3337 ...
3338 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3339
3340 where two lvalues with the same int[4][3] type overlap, and where
3341 both lvalues are distinct from the object's declared type. */
3342 if (same_base_p)
3343 {
3344 if (indices_a->unconstrained_base)
3345 full_seq.length += 1;
3346 }
3347 else
3348 full_seq = struct_seq;
3349
3350 /* Punt if we didn't find a suitable sequence. */
3351 if (full_seq.length == 0)
3352 {
3353 if (use_alt_indices
3354 || (TREE_CODE (DR_REF (a)) == MEM_REF
3355 && TREE_CODE (DR_REF (b)) == MEM_REF)
3356 || may_be_nonaddressable_p (DR_REF (a))
3357 || may_be_nonaddressable_p (DR_REF (b)))
3358 {
3359 /* Fully exhausted possibilities. */
3360 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3361 return res;
3362 }
3363
3364 /* Try evaluating both DRs as dereferences of pointers. */
3365 if (!a->alt_indices.base_object
3366 && TREE_CODE (DR_REF (a)) != MEM_REF)
3367 {
3368 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3369 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3370 build_int_cst
3371 (reference_alias_ptr_type (DR_REF (a)), 0));
3372 dr_analyze_indices (&a->alt_indices, alt_ref,
3373 loop_preheader_edge (loop_nest[0]),
3374 loop_containing_stmt (DR_STMT (a)));
3375 }
3376 if (!b->alt_indices.base_object
3377 && TREE_CODE (DR_REF (b)) != MEM_REF)
3378 {
3379 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3380 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3381 build_int_cst
3382 (reference_alias_ptr_type (DR_REF (b)), 0));
3383 dr_analyze_indices (&b->alt_indices, alt_ref,
3384 loop_preheader_edge (loop_nest[0]),
3385 loop_containing_stmt (DR_STMT (b)));
3386 }
3387 return initialize_data_dependence_relation (res, loop_nest, true);
3388 }
3389
3390 if (!same_base_p)
3391 {
3392 /* Partial overlap is possible for different bases when strict aliasing
3393 is not in effect. It's also possible if either base involves a union
3394 access; e.g. for:
3395
3396 struct s1 { int a[2]; };
3397 struct s2 { struct s1 b; int c; };
3398 struct s3 { int d; struct s1 e; };
3399 union u { struct s2 f; struct s3 g; } *p, *q;
3400
3401 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3402 "p->g.e" (base "p->g") and might partially overlap the s1 at
3403 "q->g.e" (base "q->g"). */
3404 if (!flag_strict_aliasing
3405 || ref_contains_union_access_p (full_seq.object_a)
3406 || ref_contains_union_access_p (full_seq.object_b))
3407 {
3408 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3409 return res;
3410 }
3411
3412 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3413 if (!loop_nest.exists ()
3414 || (object_address_invariant_in_loop_p (loop_nest[0],
3415 full_seq.object_a)
3416 && object_address_invariant_in_loop_p (loop_nest[0],
3417 full_seq.object_b)))
3418 {
3419 DDR_OBJECT_A (res) = full_seq.object_a;
3420 DDR_OBJECT_B (res) = full_seq.object_b;
3421 }
3422 }
3423
3424 DDR_AFFINE_P (res) = true;
3425 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3426 DDR_SUBSCRIPTS (res).create (full_seq.length);
3427 DDR_LOOP_NEST (res) = loop_nest;
3428 DDR_SELF_REFERENCE (res) = false;
3429
3430 for (i = 0; i < full_seq.length; ++i)
3431 {
3432 struct subscript *subscript;
3433
3434 subscript = XNEW (struct subscript);
3435 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3436 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3437 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3438 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3439 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3440 SUB_DISTANCE (subscript) = chrec_dont_know;
3441 DDR_SUBSCRIPTS (res).safe_push (subscript);
3442 }
3443
3444 return res;
3445 }
3446
3447 /* Initialize a data dependence relation between data accesses A and
3448 B. NB_LOOPS is the number of loops surrounding the references: the
3449 size of the classic distance/direction vectors. */
3450
3451 struct data_dependence_relation *
3452 initialize_data_dependence_relation (struct data_reference *a,
3453 struct data_reference *b,
3454 vec<loop_p> loop_nest)
3455 {
3456 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3457 DDR_A (res) = a;
3458 DDR_B (res) = b;
3459 DDR_LOOP_NEST (res).create (0);
3460 DDR_SUBSCRIPTS (res).create (0);
3461 DDR_DIR_VECTS (res).create (0);
3462 DDR_DIST_VECTS (res).create (0);
3463
3464 if (a == NULL || b == NULL)
3465 {
3466 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3467 return res;
3468 }
3469
3470 /* If the data references do not alias, then they are independent. */
3471 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3472 {
3473 DDR_ARE_DEPENDENT (res) = chrec_known;
3474 return res;
3475 }
3476
3477 return initialize_data_dependence_relation (res, loop_nest, false);
3478 }
3479
3480
3481 /* Frees memory used by the conflict function F. */
3482
3483 static void
3484 free_conflict_function (conflict_function *f)
3485 {
3486 unsigned i;
3487
3488 if (CF_NONTRIVIAL_P (f))
3489 {
3490 for (i = 0; i < f->n; i++)
3491 affine_fn_free (f->fns[i]);
3492 }
3493 free (f);
3494 }
3495
3496 /* Frees memory used by SUBSCRIPTS. */
3497
3498 static void
3499 free_subscripts (vec<subscript_p> subscripts)
3500 {
3501 for (subscript_p s : subscripts)
3502 {
3503 free_conflict_function (s->conflicting_iterations_in_a);
3504 free_conflict_function (s->conflicting_iterations_in_b);
3505 free (s);
3506 }
3507 subscripts.release ();
3508 }
3509
3510 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3511 description. */
3512
3513 static inline void
3514 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3515 tree chrec)
3516 {
3517 DDR_ARE_DEPENDENT (ddr) = chrec;
3518 free_subscripts (DDR_SUBSCRIPTS (ddr));
3519 DDR_SUBSCRIPTS (ddr).create (0);
3520 }
3521
3522 /* The dependence relation DDR cannot be represented by a distance
3523 vector. */
3524
3525 static inline void
3526 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3527 {
3528 if (dump_file && (dump_flags & TDF_DETAILS))
3529 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3530
3531 DDR_AFFINE_P (ddr) = false;
3532 }
3533
3534 \f
3535
3536 /* This section contains the classic Banerjee tests. */
3537
3538 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3539 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3540
3541 static inline bool
3542 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3543 {
3544 return (evolution_function_is_constant_p (chrec_a)
3545 && evolution_function_is_constant_p (chrec_b));
3546 }
3547
3548 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3549 variable, i.e., if the SIV (Single Index Variable) test is true. */
3550
3551 static bool
3552 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3553 {
3554 if ((evolution_function_is_constant_p (chrec_a)
3555 && evolution_function_is_univariate_p (chrec_b))
3556 || (evolution_function_is_constant_p (chrec_b)
3557 && evolution_function_is_univariate_p (chrec_a)))
3558 return true;
3559
3560 if (evolution_function_is_univariate_p (chrec_a)
3561 && evolution_function_is_univariate_p (chrec_b))
3562 {
3563 switch (TREE_CODE (chrec_a))
3564 {
3565 case POLYNOMIAL_CHREC:
3566 switch (TREE_CODE (chrec_b))
3567 {
3568 case POLYNOMIAL_CHREC:
3569 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3570 return false;
3571 /* FALLTHRU */
3572
3573 default:
3574 return true;
3575 }
3576
3577 default:
3578 return true;
3579 }
3580 }
3581
3582 return false;
3583 }
3584
3585 /* Creates a conflict function with N dimensions. The affine functions
3586 in each dimension follow. */
3587
3588 static conflict_function *
3589 conflict_fn (unsigned n, ...)
3590 {
3591 unsigned i;
3592 conflict_function *ret = XCNEW (conflict_function);
3593 va_list ap;
3594
3595 gcc_assert (n > 0 && n <= MAX_DIM);
3596 va_start (ap, n);
3597
3598 ret->n = n;
3599 for (i = 0; i < n; i++)
3600 ret->fns[i] = va_arg (ap, affine_fn);
3601 va_end (ap);
3602
3603 return ret;
3604 }
3605
3606 /* Returns constant affine function with value CST. */
3607
3608 static affine_fn
3609 affine_fn_cst (tree cst)
3610 {
3611 affine_fn fn;
3612 fn.create (1);
3613 fn.quick_push (cst);
3614 return fn;
3615 }
3616
3617 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3618
3619 static affine_fn
3620 affine_fn_univar (tree cst, unsigned dim, tree coef)
3621 {
3622 affine_fn fn;
3623 fn.create (dim + 1);
3624 unsigned i;
3625
3626 gcc_assert (dim > 0);
3627 fn.quick_push (cst);
3628 for (i = 1; i < dim; i++)
3629 fn.quick_push (integer_zero_node);
3630 fn.quick_push (coef);
3631 return fn;
3632 }
3633
3634 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3635 *OVERLAPS_B are initialized to the functions that describe the
3636 relation between the elements accessed twice by CHREC_A and
3637 CHREC_B. For k >= 0, the following property is verified:
3638
3639 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3640
3641 static void
3642 analyze_ziv_subscript (tree chrec_a,
3643 tree chrec_b,
3644 conflict_function **overlaps_a,
3645 conflict_function **overlaps_b,
3646 tree *last_conflicts)
3647 {
3648 tree type, difference;
3649 dependence_stats.num_ziv++;
3650
3651 if (dump_file && (dump_flags & TDF_DETAILS))
3652 fprintf (dump_file, "(analyze_ziv_subscript \n");
3653
3654 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3655 chrec_a = chrec_convert (type, chrec_a, NULL);
3656 chrec_b = chrec_convert (type, chrec_b, NULL);
3657 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3658
3659 switch (TREE_CODE (difference))
3660 {
3661 case INTEGER_CST:
3662 if (integer_zerop (difference))
3663 {
3664 /* The difference is equal to zero: the accessed index
3665 overlaps for each iteration in the loop. */
3666 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3667 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3668 *last_conflicts = chrec_dont_know;
3669 dependence_stats.num_ziv_dependent++;
3670 }
3671 else
3672 {
3673 /* The accesses do not overlap. */
3674 *overlaps_a = conflict_fn_no_dependence ();
3675 *overlaps_b = conflict_fn_no_dependence ();
3676 *last_conflicts = integer_zero_node;
3677 dependence_stats.num_ziv_independent++;
3678 }
3679 break;
3680
3681 default:
3682 /* We're not sure whether the indexes overlap. For the moment,
3683 conservatively answer "don't know". */
3684 if (dump_file && (dump_flags & TDF_DETAILS))
3685 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3686
3687 *overlaps_a = conflict_fn_not_known ();
3688 *overlaps_b = conflict_fn_not_known ();
3689 *last_conflicts = chrec_dont_know;
3690 dependence_stats.num_ziv_unimplemented++;
3691 break;
3692 }
3693
3694 if (dump_file && (dump_flags & TDF_DETAILS))
3695 fprintf (dump_file, ")\n");
3696 }
3697
3698 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3699 and only if it fits to the int type. If this is not the case, or the
3700 bound on the number of iterations of LOOP could not be derived, returns
3701 chrec_dont_know. */
3702
3703 static tree
3704 max_stmt_executions_tree (class loop *loop)
3705 {
3706 widest_int nit;
3707
3708 if (!max_stmt_executions (loop, &nit))
3709 return chrec_dont_know;
3710
3711 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3712 return chrec_dont_know;
3713
3714 return wide_int_to_tree (unsigned_type_node, nit);
3715 }
3716
3717 /* Determine whether the CHREC is always positive/negative. If the expression
3718 cannot be statically analyzed, return false, otherwise set the answer into
3719 VALUE. */
3720
3721 static bool
3722 chrec_is_positive (tree chrec, bool *value)
3723 {
3724 bool value0, value1, value2;
3725 tree end_value, nb_iter;
3726
3727 switch (TREE_CODE (chrec))
3728 {
3729 case POLYNOMIAL_CHREC:
3730 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3731 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3732 return false;
3733
3734 /* FIXME -- overflows. */
3735 if (value0 == value1)
3736 {
3737 *value = value0;
3738 return true;
3739 }
3740
3741 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3742 and the proof consists in showing that the sign never
3743 changes during the execution of the loop, from 0 to
3744 loop->nb_iterations. */
3745 if (!evolution_function_is_affine_p (chrec))
3746 return false;
3747
3748 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3749 if (chrec_contains_undetermined (nb_iter))
3750 return false;
3751
3752 #if 0
3753 /* TODO -- If the test is after the exit, we may decrease the number of
3754 iterations by one. */
3755 if (after_exit)
3756 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3757 #endif
3758
3759 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3760
3761 if (!chrec_is_positive (end_value, &value2))
3762 return false;
3763
3764 *value = value0;
3765 return value0 == value1;
3766
3767 case INTEGER_CST:
3768 switch (tree_int_cst_sgn (chrec))
3769 {
3770 case -1:
3771 *value = false;
3772 break;
3773 case 1:
3774 *value = true;
3775 break;
3776 default:
3777 return false;
3778 }
3779 return true;
3780
3781 default:
3782 return false;
3783 }
3784 }
3785
3786
3787 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3788 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3789 *OVERLAPS_B are initialized to the functions that describe the
3790 relation between the elements accessed twice by CHREC_A and
3791 CHREC_B. For k >= 0, the following property is verified:
3792
3793 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3794
3795 static void
3796 analyze_siv_subscript_cst_affine (tree chrec_a,
3797 tree chrec_b,
3798 conflict_function **overlaps_a,
3799 conflict_function **overlaps_b,
3800 tree *last_conflicts)
3801 {
3802 bool value0, value1, value2;
3803 tree type, difference, tmp;
3804
3805 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3806 chrec_a = chrec_convert (type, chrec_a, NULL);
3807 chrec_b = chrec_convert (type, chrec_b, NULL);
3808 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3809
3810 /* Special case overlap in the first iteration. */
3811 if (integer_zerop (difference))
3812 {
3813 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3814 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3815 *last_conflicts = integer_one_node;
3816 return;
3817 }
3818
3819 if (!chrec_is_positive (initial_condition (difference), &value0))
3820 {
3821 if (dump_file && (dump_flags & TDF_DETAILS))
3822 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3823
3824 dependence_stats.num_siv_unimplemented++;
3825 *overlaps_a = conflict_fn_not_known ();
3826 *overlaps_b = conflict_fn_not_known ();
3827 *last_conflicts = chrec_dont_know;
3828 return;
3829 }
3830 else
3831 {
3832 if (value0 == false)
3833 {
3834 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3835 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3836 {
3837 if (dump_file && (dump_flags & TDF_DETAILS))
3838 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3839
3840 *overlaps_a = conflict_fn_not_known ();
3841 *overlaps_b = conflict_fn_not_known ();
3842 *last_conflicts = chrec_dont_know;
3843 dependence_stats.num_siv_unimplemented++;
3844 return;
3845 }
3846 else
3847 {
3848 if (value1 == true)
3849 {
3850 /* Example:
3851 chrec_a = 12
3852 chrec_b = {10, +, 1}
3853 */
3854
3855 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3856 {
3857 HOST_WIDE_INT numiter;
3858 class loop *loop = get_chrec_loop (chrec_b);
3859
3860 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3861 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3862 fold_build1 (ABS_EXPR, type, difference),
3863 CHREC_RIGHT (chrec_b));
3864 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3865 *last_conflicts = integer_one_node;
3866
3867
3868 /* Perform weak-zero siv test to see if overlap is
3869 outside the loop bounds. */
3870 numiter = max_stmt_executions_int (loop);
3871
3872 if (numiter >= 0
3873 && compare_tree_int (tmp, numiter) > 0)
3874 {
3875 free_conflict_function (*overlaps_a);
3876 free_conflict_function (*overlaps_b);
3877 *overlaps_a = conflict_fn_no_dependence ();
3878 *overlaps_b = conflict_fn_no_dependence ();
3879 *last_conflicts = integer_zero_node;
3880 dependence_stats.num_siv_independent++;
3881 return;
3882 }
3883 dependence_stats.num_siv_dependent++;
3884 return;
3885 }
3886
3887 /* When the step does not divide the difference, there are
3888 no overlaps. */
3889 else
3890 {
3891 *overlaps_a = conflict_fn_no_dependence ();
3892 *overlaps_b = conflict_fn_no_dependence ();
3893 *last_conflicts = integer_zero_node;
3894 dependence_stats.num_siv_independent++;
3895 return;
3896 }
3897 }
3898
3899 else
3900 {
3901 /* Example:
3902 chrec_a = 12
3903 chrec_b = {10, +, -1}
3904
3905 In this case, chrec_a will not overlap with chrec_b. */
3906 *overlaps_a = conflict_fn_no_dependence ();
3907 *overlaps_b = conflict_fn_no_dependence ();
3908 *last_conflicts = integer_zero_node;
3909 dependence_stats.num_siv_independent++;
3910 return;
3911 }
3912 }
3913 }
3914 else
3915 {
3916 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3917 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3918 {
3919 if (dump_file && (dump_flags & TDF_DETAILS))
3920 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3921
3922 *overlaps_a = conflict_fn_not_known ();
3923 *overlaps_b = conflict_fn_not_known ();
3924 *last_conflicts = chrec_dont_know;
3925 dependence_stats.num_siv_unimplemented++;
3926 return;
3927 }
3928 else
3929 {
3930 if (value2 == false)
3931 {
3932 /* Example:
3933 chrec_a = 3
3934 chrec_b = {10, +, -1}
3935 */
3936 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3937 {
3938 HOST_WIDE_INT numiter;
3939 class loop *loop = get_chrec_loop (chrec_b);
3940
3941 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3942 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3943 CHREC_RIGHT (chrec_b));
3944 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3945 *last_conflicts = integer_one_node;
3946
3947 /* Perform weak-zero siv test to see if overlap is
3948 outside the loop bounds. */
3949 numiter = max_stmt_executions_int (loop);
3950
3951 if (numiter >= 0
3952 && compare_tree_int (tmp, numiter) > 0)
3953 {
3954 free_conflict_function (*overlaps_a);
3955 free_conflict_function (*overlaps_b);
3956 *overlaps_a = conflict_fn_no_dependence ();
3957 *overlaps_b = conflict_fn_no_dependence ();
3958 *last_conflicts = integer_zero_node;
3959 dependence_stats.num_siv_independent++;
3960 return;
3961 }
3962 dependence_stats.num_siv_dependent++;
3963 return;
3964 }
3965
3966 /* When the step does not divide the difference, there
3967 are no overlaps. */
3968 else
3969 {
3970 *overlaps_a = conflict_fn_no_dependence ();
3971 *overlaps_b = conflict_fn_no_dependence ();
3972 *last_conflicts = integer_zero_node;
3973 dependence_stats.num_siv_independent++;
3974 return;
3975 }
3976 }
3977 else
3978 {
3979 /* Example:
3980 chrec_a = 3
3981 chrec_b = {4, +, 1}
3982
3983 In this case, chrec_a will not overlap with chrec_b. */
3984 *overlaps_a = conflict_fn_no_dependence ();
3985 *overlaps_b = conflict_fn_no_dependence ();
3986 *last_conflicts = integer_zero_node;
3987 dependence_stats.num_siv_independent++;
3988 return;
3989 }
3990 }
3991 }
3992 }
3993 }
3994
3995 /* Helper recursive function for initializing the matrix A. Returns
3996 the initial value of CHREC. */
3997
3998 static tree
3999 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4000 {
4001 gcc_assert (chrec);
4002
4003 switch (TREE_CODE (chrec))
4004 {
4005 case POLYNOMIAL_CHREC:
4006 HOST_WIDE_INT chrec_right;
4007 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4008 return chrec_dont_know;
4009 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4010 /* We want to be able to negate without overflow. */
4011 if (chrec_right == HOST_WIDE_INT_MIN)
4012 return chrec_dont_know;
4013 A[index][0] = mult * chrec_right;
4014 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4015
4016 case PLUS_EXPR:
4017 case MULT_EXPR:
4018 case MINUS_EXPR:
4019 {
4020 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4021 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4022
4023 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4024 }
4025
4026 CASE_CONVERT:
4027 {
4028 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4029 return chrec_convert (chrec_type (chrec), op, NULL);
4030 }
4031
4032 case BIT_NOT_EXPR:
4033 {
4034 /* Handle ~X as -1 - X. */
4035 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4036 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4037 build_int_cst (TREE_TYPE (chrec), -1), op);
4038 }
4039
4040 case INTEGER_CST:
4041 return chrec;
4042
4043 default:
4044 gcc_unreachable ();
4045 return NULL_TREE;
4046 }
4047 }
4048
4049 #define FLOOR_DIV(x,y) ((x) / (y))
4050
4051 /* Solves the special case of the Diophantine equation:
4052 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4053
4054 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4055 number of iterations that loops X and Y run. The overlaps will be
4056 constructed as evolutions in dimension DIM. */
4057
4058 static void
4059 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4060 HOST_WIDE_INT step_a,
4061 HOST_WIDE_INT step_b,
4062 affine_fn *overlaps_a,
4063 affine_fn *overlaps_b,
4064 tree *last_conflicts, int dim)
4065 {
4066 if (((step_a > 0 && step_b > 0)
4067 || (step_a < 0 && step_b < 0)))
4068 {
4069 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4070 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4071
4072 gcd_steps_a_b = gcd (step_a, step_b);
4073 step_overlaps_a = step_b / gcd_steps_a_b;
4074 step_overlaps_b = step_a / gcd_steps_a_b;
4075
4076 if (niter > 0)
4077 {
4078 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4079 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4080 last_conflict = tau2;
4081 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4082 }
4083 else
4084 *last_conflicts = chrec_dont_know;
4085
4086 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4087 build_int_cst (NULL_TREE,
4088 step_overlaps_a));
4089 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4090 build_int_cst (NULL_TREE,
4091 step_overlaps_b));
4092 }
4093
4094 else
4095 {
4096 *overlaps_a = affine_fn_cst (integer_zero_node);
4097 *overlaps_b = affine_fn_cst (integer_zero_node);
4098 *last_conflicts = integer_zero_node;
4099 }
4100 }
4101
4102 /* Solves the special case of a Diophantine equation where CHREC_A is
4103 an affine bivariate function, and CHREC_B is an affine univariate
4104 function. For example,
4105
4106 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4107
4108 has the following overlapping functions:
4109
4110 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4111 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4112 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4113
4114 FORNOW: This is a specialized implementation for a case occurring in
4115 a common benchmark. Implement the general algorithm. */
4116
4117 static void
4118 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4119 conflict_function **overlaps_a,
4120 conflict_function **overlaps_b,
4121 tree *last_conflicts)
4122 {
4123 bool xz_p, yz_p, xyz_p;
4124 HOST_WIDE_INT step_x, step_y, step_z;
4125 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4126 affine_fn overlaps_a_xz, overlaps_b_xz;
4127 affine_fn overlaps_a_yz, overlaps_b_yz;
4128 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4129 affine_fn ova1, ova2, ovb;
4130 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4131
4132 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4133 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4134 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4135
4136 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4137 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4138 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4139
4140 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4141 {
4142 if (dump_file && (dump_flags & TDF_DETAILS))
4143 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4144
4145 *overlaps_a = conflict_fn_not_known ();
4146 *overlaps_b = conflict_fn_not_known ();
4147 *last_conflicts = chrec_dont_know;
4148 return;
4149 }
4150
4151 niter = MIN (niter_x, niter_z);
4152 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4153 &overlaps_a_xz,
4154 &overlaps_b_xz,
4155 &last_conflicts_xz, 1);
4156 niter = MIN (niter_y, niter_z);
4157 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4158 &overlaps_a_yz,
4159 &overlaps_b_yz,
4160 &last_conflicts_yz, 2);
4161 niter = MIN (niter_x, niter_z);
4162 niter = MIN (niter_y, niter);
4163 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4164 &overlaps_a_xyz,
4165 &overlaps_b_xyz,
4166 &last_conflicts_xyz, 3);
4167
4168 xz_p = !integer_zerop (last_conflicts_xz);
4169 yz_p = !integer_zerop (last_conflicts_yz);
4170 xyz_p = !integer_zerop (last_conflicts_xyz);
4171
4172 if (xz_p || yz_p || xyz_p)
4173 {
4174 ova1 = affine_fn_cst (integer_zero_node);
4175 ova2 = affine_fn_cst (integer_zero_node);
4176 ovb = affine_fn_cst (integer_zero_node);
4177 if (xz_p)
4178 {
4179 affine_fn t0 = ova1;
4180 affine_fn t2 = ovb;
4181
4182 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4183 ovb = affine_fn_plus (ovb, overlaps_b_xz);
4184 affine_fn_free (t0);
4185 affine_fn_free (t2);
4186 *last_conflicts = last_conflicts_xz;
4187 }
4188 if (yz_p)
4189 {
4190 affine_fn t0 = ova2;
4191 affine_fn t2 = ovb;
4192
4193 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4194 ovb = affine_fn_plus (ovb, overlaps_b_yz);
4195 affine_fn_free (t0);
4196 affine_fn_free (t2);
4197 *last_conflicts = last_conflicts_yz;
4198 }
4199 if (xyz_p)
4200 {
4201 affine_fn t0 = ova1;
4202 affine_fn t2 = ova2;
4203 affine_fn t4 = ovb;
4204
4205 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4206 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4207 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4208 affine_fn_free (t0);
4209 affine_fn_free (t2);
4210 affine_fn_free (t4);
4211 *last_conflicts = last_conflicts_xyz;
4212 }
4213 *overlaps_a = conflict_fn (2, ova1, ova2);
4214 *overlaps_b = conflict_fn (1, ovb);
4215 }
4216 else
4217 {
4218 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4219 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4220 *last_conflicts = integer_zero_node;
4221 }
4222
4223 affine_fn_free (overlaps_a_xz);
4224 affine_fn_free (overlaps_b_xz);
4225 affine_fn_free (overlaps_a_yz);
4226 affine_fn_free (overlaps_b_yz);
4227 affine_fn_free (overlaps_a_xyz);
4228 affine_fn_free (overlaps_b_xyz);
4229 }
4230
4231 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4232
4233 static void
4234 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4235 int size)
4236 {
4237 memcpy (vec2, vec1, size * sizeof (*vec1));
4238 }
4239
4240 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4241
4242 static void
4243 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4244 int m, int n)
4245 {
4246 int i;
4247
4248 for (i = 0; i < m; i++)
4249 lambda_vector_copy (mat1[i], mat2[i], n);
4250 }
4251
4252 /* Store the N x N identity matrix in MAT. */
4253
4254 static void
4255 lambda_matrix_id (lambda_matrix mat, int size)
4256 {
4257 int i, j;
4258
4259 for (i = 0; i < size; i++)
4260 for (j = 0; j < size; j++)
4261 mat[i][j] = (i == j) ? 1 : 0;
4262 }
4263
4264 /* Return the index of the first nonzero element of vector VEC1 between
4265 START and N. We must have START <= N.
4266 Returns N if VEC1 is the zero vector. */
4267
4268 static int
4269 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4270 {
4271 int j = start;
4272 while (j < n && vec1[j] == 0)
4273 j++;
4274 return j;
4275 }
4276
4277 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4278 R2 = R2 + CONST1 * R1. */
4279
4280 static bool
4281 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4282 lambda_int const1)
4283 {
4284 int i;
4285
4286 if (const1 == 0)
4287 return true;
4288
4289 for (i = 0; i < n; i++)
4290 {
4291 bool ovf;
4292 lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4293 if (ovf)
4294 return false;
4295 lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4296 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4297 return false;
4298 mat[r2][i] = tem2;
4299 }
4300
4301 return true;
4302 }
4303
4304 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4305 and store the result in VEC2. */
4306
4307 static void
4308 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4309 int size, lambda_int const1)
4310 {
4311 int i;
4312
4313 if (const1 == 0)
4314 lambda_vector_clear (vec2, size);
4315 else
4316 for (i = 0; i < size; i++)
4317 vec2[i] = const1 * vec1[i];
4318 }
4319
4320 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4321
4322 static void
4323 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4324 int size)
4325 {
4326 lambda_vector_mult_const (vec1, vec2, size, -1);
4327 }
4328
4329 /* Negate row R1 of matrix MAT which has N columns. */
4330
4331 static void
4332 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4333 {
4334 lambda_vector_negate (mat[r1], mat[r1], n);
4335 }
4336
4337 /* Return true if two vectors are equal. */
4338
4339 static bool
4340 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4341 {
4342 int i;
4343 for (i = 0; i < size; i++)
4344 if (vec1[i] != vec2[i])
4345 return false;
4346 return true;
4347 }
4348
4349 /* Given an M x N integer matrix A, this function determines an M x
4350 M unimodular matrix U, and an M x N echelon matrix S such that
4351 "U.A = S". This decomposition is also known as "right Hermite".
4352
4353 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4354 Restructuring Compilers" Utpal Banerjee. */
4355
4356 static bool
4357 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4358 lambda_matrix S, lambda_matrix U)
4359 {
4360 int i, j, i0 = 0;
4361
4362 lambda_matrix_copy (A, S, m, n);
4363 lambda_matrix_id (U, m);
4364
4365 for (j = 0; j < n; j++)
4366 {
4367 if (lambda_vector_first_nz (S[j], m, i0) < m)
4368 {
4369 ++i0;
4370 for (i = m - 1; i >= i0; i--)
4371 {
4372 while (S[i][j] != 0)
4373 {
4374 lambda_int factor, a, b;
4375
4376 a = S[i-1][j];
4377 b = S[i][j];
4378 gcc_assert (a != HOST_WIDE_INT_MIN);
4379 factor = a / b;
4380
4381 if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4382 return false;
4383 std::swap (S[i], S[i-1]);
4384
4385 if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4386 return false;
4387 std::swap (U[i], U[i-1]);
4388 }
4389 }
4390 }
4391 }
4392
4393 return true;
4394 }
4395
4396 /* Determines the overlapping elements due to accesses CHREC_A and
4397 CHREC_B, that are affine functions. This function cannot handle
4398 symbolic evolution functions, ie. when initial conditions are
4399 parameters, because it uses lambda matrices of integers. */
4400
4401 static void
4402 analyze_subscript_affine_affine (tree chrec_a,
4403 tree chrec_b,
4404 conflict_function **overlaps_a,
4405 conflict_function **overlaps_b,
4406 tree *last_conflicts)
4407 {
4408 unsigned nb_vars_a, nb_vars_b, dim;
4409 lambda_int gamma, gcd_alpha_beta;
4410 lambda_matrix A, U, S;
4411 struct obstack scratch_obstack;
4412
4413 if (eq_evolutions_p (chrec_a, chrec_b))
4414 {
4415 /* The accessed index overlaps for each iteration in the
4416 loop. */
4417 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4418 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4419 *last_conflicts = chrec_dont_know;
4420 return;
4421 }
4422 if (dump_file && (dump_flags & TDF_DETAILS))
4423 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4424
4425 /* For determining the initial intersection, we have to solve a
4426 Diophantine equation. This is the most time consuming part.
4427
4428 For answering to the question: "Is there a dependence?" we have
4429 to prove that there exists a solution to the Diophantine
4430 equation, and that the solution is in the iteration domain,
4431 i.e. the solution is positive or zero, and that the solution
4432 happens before the upper bound loop.nb_iterations. Otherwise
4433 there is no dependence. This function outputs a description of
4434 the iterations that hold the intersections. */
4435
4436 nb_vars_a = nb_vars_in_chrec (chrec_a);
4437 nb_vars_b = nb_vars_in_chrec (chrec_b);
4438
4439 gcc_obstack_init (&scratch_obstack);
4440
4441 dim = nb_vars_a + nb_vars_b;
4442 U = lambda_matrix_new (dim, dim, &scratch_obstack);
4443 A = lambda_matrix_new (dim, 1, &scratch_obstack);
4444 S = lambda_matrix_new (dim, 1, &scratch_obstack);
4445
4446 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4447 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4448 if (init_a == chrec_dont_know
4449 || init_b == chrec_dont_know)
4450 {
4451 if (dump_file && (dump_flags & TDF_DETAILS))
4452 fprintf (dump_file, "affine-affine test failed: "
4453 "representation issue.\n");
4454 *overlaps_a = conflict_fn_not_known ();
4455 *overlaps_b = conflict_fn_not_known ();
4456 *last_conflicts = chrec_dont_know;
4457 goto end_analyze_subs_aa;
4458 }
4459 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4460
4461 /* Don't do all the hard work of solving the Diophantine equation
4462 when we already know the solution: for example,
4463 | {3, +, 1}_1
4464 | {3, +, 4}_2
4465 | gamma = 3 - 3 = 0.
4466 Then the first overlap occurs during the first iterations:
4467 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4468 */
4469 if (gamma == 0)
4470 {
4471 if (nb_vars_a == 1 && nb_vars_b == 1)
4472 {
4473 HOST_WIDE_INT step_a, step_b;
4474 HOST_WIDE_INT niter, niter_a, niter_b;
4475 affine_fn ova, ovb;
4476
4477 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4478 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4479 niter = MIN (niter_a, niter_b);
4480 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4481 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4482
4483 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4484 &ova, &ovb,
4485 last_conflicts, 1);
4486 *overlaps_a = conflict_fn (1, ova);
4487 *overlaps_b = conflict_fn (1, ovb);
4488 }
4489
4490 else if (nb_vars_a == 2 && nb_vars_b == 1)
4491 compute_overlap_steps_for_affine_1_2
4492 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4493
4494 else if (nb_vars_a == 1 && nb_vars_b == 2)
4495 compute_overlap_steps_for_affine_1_2
4496 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4497
4498 else
4499 {
4500 if (dump_file && (dump_flags & TDF_DETAILS))
4501 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4502 *overlaps_a = conflict_fn_not_known ();
4503 *overlaps_b = conflict_fn_not_known ();
4504 *last_conflicts = chrec_dont_know;
4505 }
4506 goto end_analyze_subs_aa;
4507 }
4508
4509 /* U.A = S */
4510 if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4511 {
4512 *overlaps_a = conflict_fn_not_known ();
4513 *overlaps_b = conflict_fn_not_known ();
4514 *last_conflicts = chrec_dont_know;
4515 goto end_analyze_subs_aa;
4516 }
4517
4518 if (S[0][0] < 0)
4519 {
4520 S[0][0] *= -1;
4521 lambda_matrix_row_negate (U, dim, 0);
4522 }
4523 gcd_alpha_beta = S[0][0];
4524
4525 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4526 but that is a quite strange case. Instead of ICEing, answer
4527 don't know. */
4528 if (gcd_alpha_beta == 0)
4529 {
4530 *overlaps_a = conflict_fn_not_known ();
4531 *overlaps_b = conflict_fn_not_known ();
4532 *last_conflicts = chrec_dont_know;
4533 goto end_analyze_subs_aa;
4534 }
4535
4536 /* The classic "gcd-test". */
4537 if (!int_divides_p (gcd_alpha_beta, gamma))
4538 {
4539 /* The "gcd-test" has determined that there is no integer
4540 solution, i.e. there is no dependence. */
4541 *overlaps_a = conflict_fn_no_dependence ();
4542 *overlaps_b = conflict_fn_no_dependence ();
4543 *last_conflicts = integer_zero_node;
4544 }
4545
4546 /* Both access functions are univariate. This includes SIV and MIV cases. */
4547 else if (nb_vars_a == 1 && nb_vars_b == 1)
4548 {
4549 /* Both functions should have the same evolution sign. */
4550 if (((A[0][0] > 0 && -A[1][0] > 0)
4551 || (A[0][0] < 0 && -A[1][0] < 0)))
4552 {
4553 /* The solutions are given by:
4554 |
4555 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4556 | [u21 u22] [y0]
4557
4558 For a given integer t. Using the following variables,
4559
4560 | i0 = u11 * gamma / gcd_alpha_beta
4561 | j0 = u12 * gamma / gcd_alpha_beta
4562 | i1 = u21
4563 | j1 = u22
4564
4565 the solutions are:
4566
4567 | x0 = i0 + i1 * t,
4568 | y0 = j0 + j1 * t. */
4569 HOST_WIDE_INT i0, j0, i1, j1;
4570
4571 i0 = U[0][0] * gamma / gcd_alpha_beta;
4572 j0 = U[0][1] * gamma / gcd_alpha_beta;
4573 i1 = U[1][0];
4574 j1 = U[1][1];
4575
4576 if ((i1 == 0 && i0 < 0)
4577 || (j1 == 0 && j0 < 0))
4578 {
4579 /* There is no solution.
4580 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4581 falls in here, but for the moment we don't look at the
4582 upper bound of the iteration domain. */
4583 *overlaps_a = conflict_fn_no_dependence ();
4584 *overlaps_b = conflict_fn_no_dependence ();
4585 *last_conflicts = integer_zero_node;
4586 goto end_analyze_subs_aa;
4587 }
4588
4589 if (i1 > 0 && j1 > 0)
4590 {
4591 HOST_WIDE_INT niter_a
4592 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4593 HOST_WIDE_INT niter_b
4594 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4595 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4596
4597 /* (X0, Y0) is a solution of the Diophantine equation:
4598 "chrec_a (X0) = chrec_b (Y0)". */
4599 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4600 CEIL (-j0, j1));
4601 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4602 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4603
4604 /* (X1, Y1) is the smallest positive solution of the eq
4605 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4606 first conflict occurs. */
4607 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4608 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4609 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4610
4611 if (niter > 0)
4612 {
4613 /* If the overlap occurs outside of the bounds of the
4614 loop, there is no dependence. */
4615 if (x1 >= niter_a || y1 >= niter_b)
4616 {
4617 *overlaps_a = conflict_fn_no_dependence ();
4618 *overlaps_b = conflict_fn_no_dependence ();
4619 *last_conflicts = integer_zero_node;
4620 goto end_analyze_subs_aa;
4621 }
4622
4623 /* max stmt executions can get quite large, avoid
4624 overflows by using wide ints here. */
4625 widest_int tau2
4626 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4627 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4628 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4629 if (wi::min_precision (last_conflict, SIGNED)
4630 <= TYPE_PRECISION (integer_type_node))
4631 *last_conflicts
4632 = build_int_cst (integer_type_node,
4633 last_conflict.to_shwi ());
4634 else
4635 *last_conflicts = chrec_dont_know;
4636 }
4637 else
4638 *last_conflicts = chrec_dont_know;
4639
4640 *overlaps_a
4641 = conflict_fn (1,
4642 affine_fn_univar (build_int_cst (NULL_TREE, x1),
4643 1,
4644 build_int_cst (NULL_TREE, i1)));
4645 *overlaps_b
4646 = conflict_fn (1,
4647 affine_fn_univar (build_int_cst (NULL_TREE, y1),
4648 1,
4649 build_int_cst (NULL_TREE, j1)));
4650 }
4651 else
4652 {
4653 /* FIXME: For the moment, the upper bound of the
4654 iteration domain for i and j is not checked. */
4655 if (dump_file && (dump_flags & TDF_DETAILS))
4656 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4657 *overlaps_a = conflict_fn_not_known ();
4658 *overlaps_b = conflict_fn_not_known ();
4659 *last_conflicts = chrec_dont_know;
4660 }
4661 }
4662 else
4663 {
4664 if (dump_file && (dump_flags & TDF_DETAILS))
4665 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4666 *overlaps_a = conflict_fn_not_known ();
4667 *overlaps_b = conflict_fn_not_known ();
4668 *last_conflicts = chrec_dont_know;
4669 }
4670 }
4671 else
4672 {
4673 if (dump_file && (dump_flags & TDF_DETAILS))
4674 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4675 *overlaps_a = conflict_fn_not_known ();
4676 *overlaps_b = conflict_fn_not_known ();
4677 *last_conflicts = chrec_dont_know;
4678 }
4679
4680 end_analyze_subs_aa:
4681 obstack_free (&scratch_obstack, NULL);
4682 if (dump_file && (dump_flags & TDF_DETAILS))
4683 {
4684 fprintf (dump_file, " (overlaps_a = ");
4685 dump_conflict_function (dump_file, *overlaps_a);
4686 fprintf (dump_file, ")\n (overlaps_b = ");
4687 dump_conflict_function (dump_file, *overlaps_b);
4688 fprintf (dump_file, "))\n");
4689 }
4690 }
4691
4692 /* Returns true when analyze_subscript_affine_affine can be used for
4693 determining the dependence relation between chrec_a and chrec_b,
4694 that contain symbols. This function modifies chrec_a and chrec_b
4695 such that the analysis result is the same, and such that they don't
4696 contain symbols, and then can safely be passed to the analyzer.
4697
4698 Example: The analysis of the following tuples of evolutions produce
4699 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4700 vs. {0, +, 1}_1
4701
4702 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4703 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4704 */
4705
4706 static bool
4707 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4708 {
4709 tree diff, type, left_a, left_b, right_b;
4710
4711 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4712 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4713 /* FIXME: For the moment not handled. Might be refined later. */
4714 return false;
4715
4716 type = chrec_type (*chrec_a);
4717 left_a = CHREC_LEFT (*chrec_a);
4718 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4719 diff = chrec_fold_minus (type, left_a, left_b);
4720
4721 if (!evolution_function_is_constant_p (diff))
4722 return false;
4723
4724 if (dump_file && (dump_flags & TDF_DETAILS))
4725 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4726
4727 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4728 diff, CHREC_RIGHT (*chrec_a));
4729 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4730 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4731 build_int_cst (type, 0),
4732 right_b);
4733 return true;
4734 }
4735
4736 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4737 *OVERLAPS_B are initialized to the functions that describe the
4738 relation between the elements accessed twice by CHREC_A and
4739 CHREC_B. For k >= 0, the following property is verified:
4740
4741 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4742
4743 static void
4744 analyze_siv_subscript (tree chrec_a,
4745 tree chrec_b,
4746 conflict_function **overlaps_a,
4747 conflict_function **overlaps_b,
4748 tree *last_conflicts,
4749 int loop_nest_num)
4750 {
4751 dependence_stats.num_siv++;
4752
4753 if (dump_file && (dump_flags & TDF_DETAILS))
4754 fprintf (dump_file, "(analyze_siv_subscript \n");
4755
4756 if (evolution_function_is_constant_p (chrec_a)
4757 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4758 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4759 overlaps_a, overlaps_b, last_conflicts);
4760
4761 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4762 && evolution_function_is_constant_p (chrec_b))
4763 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4764 overlaps_b, overlaps_a, last_conflicts);
4765
4766 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4767 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4768 {
4769 if (!chrec_contains_symbols (chrec_a)
4770 && !chrec_contains_symbols (chrec_b))
4771 {
4772 analyze_subscript_affine_affine (chrec_a, chrec_b,
4773 overlaps_a, overlaps_b,
4774 last_conflicts);
4775
4776 if (CF_NOT_KNOWN_P (*overlaps_a)
4777 || CF_NOT_KNOWN_P (*overlaps_b))
4778 dependence_stats.num_siv_unimplemented++;
4779 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4780 || CF_NO_DEPENDENCE_P (*overlaps_b))
4781 dependence_stats.num_siv_independent++;
4782 else
4783 dependence_stats.num_siv_dependent++;
4784 }
4785 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4786 &chrec_b))
4787 {
4788 analyze_subscript_affine_affine (chrec_a, chrec_b,
4789 overlaps_a, overlaps_b,
4790 last_conflicts);
4791
4792 if (CF_NOT_KNOWN_P (*overlaps_a)
4793 || CF_NOT_KNOWN_P (*overlaps_b))
4794 dependence_stats.num_siv_unimplemented++;
4795 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4796 || CF_NO_DEPENDENCE_P (*overlaps_b))
4797 dependence_stats.num_siv_independent++;
4798 else
4799 dependence_stats.num_siv_dependent++;
4800 }
4801 else
4802 goto siv_subscript_dontknow;
4803 }
4804
4805 else
4806 {
4807 siv_subscript_dontknow:;
4808 if (dump_file && (dump_flags & TDF_DETAILS))
4809 fprintf (dump_file, " siv test failed: unimplemented");
4810 *overlaps_a = conflict_fn_not_known ();
4811 *overlaps_b = conflict_fn_not_known ();
4812 *last_conflicts = chrec_dont_know;
4813 dependence_stats.num_siv_unimplemented++;
4814 }
4815
4816 if (dump_file && (dump_flags & TDF_DETAILS))
4817 fprintf (dump_file, ")\n");
4818 }
4819
4820 /* Returns false if we can prove that the greatest common divisor of the steps
4821 of CHREC does not divide CST, false otherwise. */
4822
4823 static bool
4824 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4825 {
4826 HOST_WIDE_INT cd = 0, val;
4827 tree step;
4828
4829 if (!tree_fits_shwi_p (cst))
4830 return true;
4831 val = tree_to_shwi (cst);
4832
4833 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4834 {
4835 step = CHREC_RIGHT (chrec);
4836 if (!tree_fits_shwi_p (step))
4837 return true;
4838 cd = gcd (cd, tree_to_shwi (step));
4839 chrec = CHREC_LEFT (chrec);
4840 }
4841
4842 return val % cd == 0;
4843 }
4844
4845 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4846 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4847 functions that describe the relation between the elements accessed
4848 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4849 is verified:
4850
4851 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4852
4853 static void
4854 analyze_miv_subscript (tree chrec_a,
4855 tree chrec_b,
4856 conflict_function **overlaps_a,
4857 conflict_function **overlaps_b,
4858 tree *last_conflicts,
4859 class loop *loop_nest)
4860 {
4861 tree type, difference;
4862
4863 dependence_stats.num_miv++;
4864 if (dump_file && (dump_flags & TDF_DETAILS))
4865 fprintf (dump_file, "(analyze_miv_subscript \n");
4866
4867 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4868 chrec_a = chrec_convert (type, chrec_a, NULL);
4869 chrec_b = chrec_convert (type, chrec_b, NULL);
4870 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4871
4872 if (eq_evolutions_p (chrec_a, chrec_b))
4873 {
4874 /* Access functions are the same: all the elements are accessed
4875 in the same order. */
4876 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4877 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4878 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4879 dependence_stats.num_miv_dependent++;
4880 }
4881
4882 else if (evolution_function_is_constant_p (difference)
4883 && evolution_function_is_affine_multivariate_p (chrec_a,
4884 loop_nest->num)
4885 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4886 {
4887 /* testsuite/.../ssa-chrec-33.c
4888 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4889
4890 The difference is 1, and all the evolution steps are multiples
4891 of 2, consequently there are no overlapping elements. */
4892 *overlaps_a = conflict_fn_no_dependence ();
4893 *overlaps_b = conflict_fn_no_dependence ();
4894 *last_conflicts = integer_zero_node;
4895 dependence_stats.num_miv_independent++;
4896 }
4897
4898 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4899 && !chrec_contains_symbols (chrec_a, loop_nest)
4900 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4901 && !chrec_contains_symbols (chrec_b, loop_nest))
4902 {
4903 /* testsuite/.../ssa-chrec-35.c
4904 {0, +, 1}_2 vs. {0, +, 1}_3
4905 the overlapping elements are respectively located at iterations:
4906 {0, +, 1}_x and {0, +, 1}_x,
4907 in other words, we have the equality:
4908 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4909
4910 Other examples:
4911 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4912 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4913
4914 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4915 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4916 */
4917 analyze_subscript_affine_affine (chrec_a, chrec_b,
4918 overlaps_a, overlaps_b, last_conflicts);
4919
4920 if (CF_NOT_KNOWN_P (*overlaps_a)
4921 || CF_NOT_KNOWN_P (*overlaps_b))
4922 dependence_stats.num_miv_unimplemented++;
4923 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4924 || CF_NO_DEPENDENCE_P (*overlaps_b))
4925 dependence_stats.num_miv_independent++;
4926 else
4927 dependence_stats.num_miv_dependent++;
4928 }
4929
4930 else
4931 {
4932 /* When the analysis is too difficult, answer "don't know". */
4933 if (dump_file && (dump_flags & TDF_DETAILS))
4934 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4935
4936 *overlaps_a = conflict_fn_not_known ();
4937 *overlaps_b = conflict_fn_not_known ();
4938 *last_conflicts = chrec_dont_know;
4939 dependence_stats.num_miv_unimplemented++;
4940 }
4941
4942 if (dump_file && (dump_flags & TDF_DETAILS))
4943 fprintf (dump_file, ")\n");
4944 }
4945
4946 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4947 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4948 OVERLAP_ITERATIONS_B are initialized with two functions that
4949 describe the iterations that contain conflicting elements.
4950
4951 Remark: For an integer k >= 0, the following equality is true:
4952
4953 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4954 */
4955
4956 static void
4957 analyze_overlapping_iterations (tree chrec_a,
4958 tree chrec_b,
4959 conflict_function **overlap_iterations_a,
4960 conflict_function **overlap_iterations_b,
4961 tree *last_conflicts, class loop *loop_nest)
4962 {
4963 unsigned int lnn = loop_nest->num;
4964
4965 dependence_stats.num_subscript_tests++;
4966
4967 if (dump_file && (dump_flags & TDF_DETAILS))
4968 {
4969 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4970 fprintf (dump_file, " (chrec_a = ");
4971 print_generic_expr (dump_file, chrec_a);
4972 fprintf (dump_file, ")\n (chrec_b = ");
4973 print_generic_expr (dump_file, chrec_b);
4974 fprintf (dump_file, ")\n");
4975 }
4976
4977 if (chrec_a == NULL_TREE
4978 || chrec_b == NULL_TREE
4979 || chrec_contains_undetermined (chrec_a)
4980 || chrec_contains_undetermined (chrec_b))
4981 {
4982 dependence_stats.num_subscript_undetermined++;
4983
4984 *overlap_iterations_a = conflict_fn_not_known ();
4985 *overlap_iterations_b = conflict_fn_not_known ();
4986 }
4987
4988 /* If they are the same chrec, and are affine, they overlap
4989 on every iteration. */
4990 else if (eq_evolutions_p (chrec_a, chrec_b)
4991 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4992 || operand_equal_p (chrec_a, chrec_b, 0)))
4993 {
4994 dependence_stats.num_same_subscript_function++;
4995 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4996 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4997 *last_conflicts = chrec_dont_know;
4998 }
4999
5000 /* If they aren't the same, and aren't affine, we can't do anything
5001 yet. */
5002 else if ((chrec_contains_symbols (chrec_a)
5003 || chrec_contains_symbols (chrec_b))
5004 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5005 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5006 {
5007 dependence_stats.num_subscript_undetermined++;
5008 *overlap_iterations_a = conflict_fn_not_known ();
5009 *overlap_iterations_b = conflict_fn_not_known ();
5010 }
5011
5012 else if (ziv_subscript_p (chrec_a, chrec_b))
5013 analyze_ziv_subscript (chrec_a, chrec_b,
5014 overlap_iterations_a, overlap_iterations_b,
5015 last_conflicts);
5016
5017 else if (siv_subscript_p (chrec_a, chrec_b))
5018 analyze_siv_subscript (chrec_a, chrec_b,
5019 overlap_iterations_a, overlap_iterations_b,
5020 last_conflicts, lnn);
5021
5022 else
5023 analyze_miv_subscript (chrec_a, chrec_b,
5024 overlap_iterations_a, overlap_iterations_b,
5025 last_conflicts, loop_nest);
5026
5027 if (dump_file && (dump_flags & TDF_DETAILS))
5028 {
5029 fprintf (dump_file, " (overlap_iterations_a = ");
5030 dump_conflict_function (dump_file, *overlap_iterations_a);
5031 fprintf (dump_file, ")\n (overlap_iterations_b = ");
5032 dump_conflict_function (dump_file, *overlap_iterations_b);
5033 fprintf (dump_file, "))\n");
5034 }
5035 }
5036
5037 /* Helper function for uniquely inserting distance vectors. */
5038
5039 static void
5040 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5041 {
5042 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5043 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5044 return;
5045
5046 DDR_DIST_VECTS (ddr).safe_push (dist_v);
5047 }
5048
5049 /* Helper function for uniquely inserting direction vectors. */
5050
5051 static void
5052 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5053 {
5054 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5055 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5056 return;
5057
5058 DDR_DIR_VECTS (ddr).safe_push (dir_v);
5059 }
5060
5061 /* Add a distance of 1 on all the loops outer than INDEX. If we
5062 haven't yet determined a distance for this outer loop, push a new
5063 distance vector composed of the previous distance, and a distance
5064 of 1 for this outer loop. Example:
5065
5066 | loop_1
5067 | loop_2
5068 | A[10]
5069 | endloop_2
5070 | endloop_1
5071
5072 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5073 save (0, 1), then we have to save (1, 0). */
5074
5075 static void
5076 add_outer_distances (struct data_dependence_relation *ddr,
5077 lambda_vector dist_v, int index)
5078 {
5079 /* For each outer loop where init_v is not set, the accesses are
5080 in dependence of distance 1 in the loop. */
5081 while (--index >= 0)
5082 {
5083 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5084 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5085 save_v[index] = 1;
5086 save_dist_v (ddr, save_v);
5087 }
5088 }
5089
5090 /* Return false when fail to represent the data dependence as a
5091 distance vector. A_INDEX is the index of the first reference
5092 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5093 second reference. INIT_B is set to true when a component has been
5094 added to the distance vector DIST_V. INDEX_CARRY is then set to
5095 the index in DIST_V that carries the dependence. */
5096
5097 static bool
5098 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5099 unsigned int a_index, unsigned int b_index,
5100 lambda_vector dist_v, bool *init_b,
5101 int *index_carry)
5102 {
5103 unsigned i;
5104 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5105 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5106
5107 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5108 {
5109 tree access_fn_a, access_fn_b;
5110 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5111
5112 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5113 {
5114 non_affine_dependence_relation (ddr);
5115 return false;
5116 }
5117
5118 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5119 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5120
5121 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5122 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5123 {
5124 HOST_WIDE_INT dist;
5125 int index;
5126 int var_a = CHREC_VARIABLE (access_fn_a);
5127 int var_b = CHREC_VARIABLE (access_fn_b);
5128
5129 if (var_a != var_b
5130 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5131 {
5132 non_affine_dependence_relation (ddr);
5133 return false;
5134 }
5135
5136 /* When data references are collected in a loop while data
5137 dependences are analyzed in loop nest nested in the loop, we
5138 would have more number of access functions than number of
5139 loops. Skip access functions of loops not in the loop nest.
5140
5141 See PR89725 for more information. */
5142 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5143 continue;
5144
5145 dist = int_cst_value (SUB_DISTANCE (subscript));
5146 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5147 *index_carry = MIN (index, *index_carry);
5148
5149 /* This is the subscript coupling test. If we have already
5150 recorded a distance for this loop (a distance coming from
5151 another subscript), it should be the same. For example,
5152 in the following code, there is no dependence:
5153
5154 | loop i = 0, N, 1
5155 | T[i+1][i] = ...
5156 | ... = T[i][i]
5157 | endloop
5158 */
5159 if (init_v[index] != 0 && dist_v[index] != dist)
5160 {
5161 finalize_ddr_dependent (ddr, chrec_known);
5162 return false;
5163 }
5164
5165 dist_v[index] = dist;
5166 init_v[index] = 1;
5167 *init_b = true;
5168 }
5169 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5170 {
5171 /* This can be for example an affine vs. constant dependence
5172 (T[i] vs. T[3]) that is not an affine dependence and is
5173 not representable as a distance vector. */
5174 non_affine_dependence_relation (ddr);
5175 return false;
5176 }
5177 else
5178 *init_b = true;
5179 }
5180
5181 return true;
5182 }
5183
5184 /* Return true when the DDR contains only invariant access functions wrto. loop
5185 number LNUM. */
5186
5187 static bool
5188 invariant_access_functions (const struct data_dependence_relation *ddr,
5189 int lnum)
5190 {
5191 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5192 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5193 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5194 return false;
5195
5196 return true;
5197 }
5198
5199 /* Helper function for the case where DDR_A and DDR_B are the same
5200 multivariate access function with a constant step. For an example
5201 see pr34635-1.c. */
5202
5203 static void
5204 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5205 {
5206 int x_1, x_2;
5207 tree c_1 = CHREC_LEFT (c_2);
5208 tree c_0 = CHREC_LEFT (c_1);
5209 lambda_vector dist_v;
5210 HOST_WIDE_INT v1, v2, cd;
5211
5212 /* Polynomials with more than 2 variables are not handled yet. When
5213 the evolution steps are parameters, it is not possible to
5214 represent the dependence using classical distance vectors. */
5215 if (TREE_CODE (c_0) != INTEGER_CST
5216 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5217 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5218 {
5219 DDR_AFFINE_P (ddr) = false;
5220 return;
5221 }
5222
5223 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5224 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5225
5226 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5227 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5228 v1 = int_cst_value (CHREC_RIGHT (c_1));
5229 v2 = int_cst_value (CHREC_RIGHT (c_2));
5230 cd = gcd (v1, v2);
5231 v1 /= cd;
5232 v2 /= cd;
5233
5234 if (v2 < 0)
5235 {
5236 v2 = -v2;
5237 v1 = -v1;
5238 }
5239
5240 dist_v[x_1] = v2;
5241 dist_v[x_2] = -v1;
5242 save_dist_v (ddr, dist_v);
5243
5244 add_outer_distances (ddr, dist_v, x_1);
5245 }
5246
5247 /* Helper function for the case where DDR_A and DDR_B are the same
5248 access functions. */
5249
5250 static void
5251 add_other_self_distances (struct data_dependence_relation *ddr)
5252 {
5253 lambda_vector dist_v;
5254 unsigned i;
5255 int index_carry = DDR_NB_LOOPS (ddr);
5256 subscript *sub;
5257 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5258
5259 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5260 {
5261 tree access_fun = SUB_ACCESS_FN (sub, 0);
5262
5263 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5264 {
5265 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5266 {
5267 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5268 {
5269 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5270 return;
5271 }
5272
5273 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5274
5275 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5276 add_multivariate_self_dist (ddr, access_fun);
5277 else
5278 /* The evolution step is not constant: it varies in
5279 the outer loop, so this cannot be represented by a
5280 distance vector. For example in pr34635.c the
5281 evolution is {0, +, {0, +, 4}_1}_2. */
5282 DDR_AFFINE_P (ddr) = false;
5283
5284 return;
5285 }
5286
5287 /* When data references are collected in a loop while data
5288 dependences are analyzed in loop nest nested in the loop, we
5289 would have more number of access functions than number of
5290 loops. Skip access functions of loops not in the loop nest.
5291
5292 See PR89725 for more information. */
5293 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5294 loop))
5295 continue;
5296
5297 index_carry = MIN (index_carry,
5298 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5299 DDR_LOOP_NEST (ddr)));
5300 }
5301 }
5302
5303 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5304 add_outer_distances (ddr, dist_v, index_carry);
5305 }
5306
5307 static void
5308 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5309 {
5310 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5311
5312 dist_v[0] = 1;
5313 save_dist_v (ddr, dist_v);
5314 }
5315
5316 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5317 is the case for example when access functions are the same and
5318 equal to a constant, as in:
5319
5320 | loop_1
5321 | A[3] = ...
5322 | ... = A[3]
5323 | endloop_1
5324
5325 in which case the distance vectors are (0) and (1). */
5326
5327 static void
5328 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5329 {
5330 unsigned i, j;
5331
5332 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5333 {
5334 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5335 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5336 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5337
5338 for (j = 0; j < ca->n; j++)
5339 if (affine_function_zero_p (ca->fns[j]))
5340 {
5341 insert_innermost_unit_dist_vector (ddr);
5342 return;
5343 }
5344
5345 for (j = 0; j < cb->n; j++)
5346 if (affine_function_zero_p (cb->fns[j]))
5347 {
5348 insert_innermost_unit_dist_vector (ddr);
5349 return;
5350 }
5351 }
5352 }
5353
5354 /* Return true when the DDR contains two data references that have the
5355 same access functions. */
5356
5357 static inline bool
5358 same_access_functions (const struct data_dependence_relation *ddr)
5359 {
5360 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5361 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5362 SUB_ACCESS_FN (sub, 1)))
5363 return false;
5364
5365 return true;
5366 }
5367
5368 /* Compute the classic per loop distance vector. DDR is the data
5369 dependence relation to build a vector from. Return false when fail
5370 to represent the data dependence as a distance vector. */
5371
5372 static bool
5373 build_classic_dist_vector (struct data_dependence_relation *ddr,
5374 class loop *loop_nest)
5375 {
5376 bool init_b = false;
5377 int index_carry = DDR_NB_LOOPS (ddr);
5378 lambda_vector dist_v;
5379
5380 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5381 return false;
5382
5383 if (same_access_functions (ddr))
5384 {
5385 /* Save the 0 vector. */
5386 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5387 save_dist_v (ddr, dist_v);
5388
5389 if (invariant_access_functions (ddr, loop_nest->num))
5390 add_distance_for_zero_overlaps (ddr);
5391
5392 if (DDR_NB_LOOPS (ddr) > 1)
5393 add_other_self_distances (ddr);
5394
5395 return true;
5396 }
5397
5398 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5399 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5400 return false;
5401
5402 /* Save the distance vector if we initialized one. */
5403 if (init_b)
5404 {
5405 /* Verify a basic constraint: classic distance vectors should
5406 always be lexicographically positive.
5407
5408 Data references are collected in the order of execution of
5409 the program, thus for the following loop
5410
5411 | for (i = 1; i < 100; i++)
5412 | for (j = 1; j < 100; j++)
5413 | {
5414 | t = T[j+1][i-1]; // A
5415 | T[j][i] = t + 2; // B
5416 | }
5417
5418 references are collected following the direction of the wind:
5419 A then B. The data dependence tests are performed also
5420 following this order, such that we're looking at the distance
5421 separating the elements accessed by A from the elements later
5422 accessed by B. But in this example, the distance returned by
5423 test_dep (A, B) is lexicographically negative (-1, 1), that
5424 means that the access A occurs later than B with respect to
5425 the outer loop, ie. we're actually looking upwind. In this
5426 case we solve test_dep (B, A) looking downwind to the
5427 lexicographically positive solution, that returns the
5428 distance vector (1, -1). */
5429 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5430 {
5431 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5432 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5433 return false;
5434 compute_subscript_distance (ddr);
5435 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5436 &index_carry))
5437 return false;
5438 save_dist_v (ddr, save_v);
5439 DDR_REVERSED_P (ddr) = true;
5440
5441 /* In this case there is a dependence forward for all the
5442 outer loops:
5443
5444 | for (k = 1; k < 100; k++)
5445 | for (i = 1; i < 100; i++)
5446 | for (j = 1; j < 100; j++)
5447 | {
5448 | t = T[j+1][i-1]; // A
5449 | T[j][i] = t + 2; // B
5450 | }
5451
5452 the vectors are:
5453 (0, 1, -1)
5454 (1, 1, -1)
5455 (1, -1, 1)
5456 */
5457 if (DDR_NB_LOOPS (ddr) > 1)
5458 {
5459 add_outer_distances (ddr, save_v, index_carry);
5460 add_outer_distances (ddr, dist_v, index_carry);
5461 }
5462 }
5463 else
5464 {
5465 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5466 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5467
5468 if (DDR_NB_LOOPS (ddr) > 1)
5469 {
5470 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5471
5472 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5473 return false;
5474 compute_subscript_distance (ddr);
5475 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5476 &index_carry))
5477 return false;
5478
5479 save_dist_v (ddr, save_v);
5480 add_outer_distances (ddr, dist_v, index_carry);
5481 add_outer_distances (ddr, opposite_v, index_carry);
5482 }
5483 else
5484 save_dist_v (ddr, save_v);
5485 }
5486 }
5487 else
5488 {
5489 /* There is a distance of 1 on all the outer loops: Example:
5490 there is a dependence of distance 1 on loop_1 for the array A.
5491
5492 | loop_1
5493 | A[5] = ...
5494 | endloop
5495 */
5496 add_outer_distances (ddr, dist_v,
5497 lambda_vector_first_nz (dist_v,
5498 DDR_NB_LOOPS (ddr), 0));
5499 }
5500
5501 if (dump_file && (dump_flags & TDF_DETAILS))
5502 {
5503 unsigned i;
5504
5505 fprintf (dump_file, "(build_classic_dist_vector\n");
5506 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5507 {
5508 fprintf (dump_file, " dist_vector = (");
5509 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5510 DDR_NB_LOOPS (ddr));
5511 fprintf (dump_file, " )\n");
5512 }
5513 fprintf (dump_file, ")\n");
5514 }
5515
5516 return true;
5517 }
5518
5519 /* Return the direction for a given distance.
5520 FIXME: Computing dir this way is suboptimal, since dir can catch
5521 cases that dist is unable to represent. */
5522
5523 static inline enum data_dependence_direction
5524 dir_from_dist (int dist)
5525 {
5526 if (dist > 0)
5527 return dir_positive;
5528 else if (dist < 0)
5529 return dir_negative;
5530 else
5531 return dir_equal;
5532 }
5533
5534 /* Compute the classic per loop direction vector. DDR is the data
5535 dependence relation to build a vector from. */
5536
5537 static void
5538 build_classic_dir_vector (struct data_dependence_relation *ddr)
5539 {
5540 unsigned i, j;
5541 lambda_vector dist_v;
5542
5543 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5544 {
5545 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5546
5547 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5548 dir_v[j] = dir_from_dist (dist_v[j]);
5549
5550 save_dir_v (ddr, dir_v);
5551 }
5552 }
5553
5554 /* Helper function. Returns true when there is a dependence between the
5555 data references. A_INDEX is the index of the first reference (0 for
5556 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5557
5558 static bool
5559 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5560 unsigned int a_index, unsigned int b_index,
5561 class loop *loop_nest)
5562 {
5563 unsigned int i;
5564 tree last_conflicts;
5565 struct subscript *subscript;
5566 tree res = NULL_TREE;
5567
5568 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5569 {
5570 conflict_function *overlaps_a, *overlaps_b;
5571
5572 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5573 SUB_ACCESS_FN (subscript, b_index),
5574 &overlaps_a, &overlaps_b,
5575 &last_conflicts, loop_nest);
5576
5577 if (SUB_CONFLICTS_IN_A (subscript))
5578 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5579 if (SUB_CONFLICTS_IN_B (subscript))
5580 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5581
5582 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5583 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5584 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5585
5586 /* If there is any undetermined conflict function we have to
5587 give a conservative answer in case we cannot prove that
5588 no dependence exists when analyzing another subscript. */
5589 if (CF_NOT_KNOWN_P (overlaps_a)
5590 || CF_NOT_KNOWN_P (overlaps_b))
5591 {
5592 res = chrec_dont_know;
5593 continue;
5594 }
5595
5596 /* When there is a subscript with no dependence we can stop. */
5597 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5598 || CF_NO_DEPENDENCE_P (overlaps_b))
5599 {
5600 res = chrec_known;
5601 break;
5602 }
5603 }
5604
5605 if (res == NULL_TREE)
5606 return true;
5607
5608 if (res == chrec_known)
5609 dependence_stats.num_dependence_independent++;
5610 else
5611 dependence_stats.num_dependence_undetermined++;
5612 finalize_ddr_dependent (ddr, res);
5613 return false;
5614 }
5615
5616 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5617
5618 static void
5619 subscript_dependence_tester (struct data_dependence_relation *ddr,
5620 class loop *loop_nest)
5621 {
5622 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5623 dependence_stats.num_dependence_dependent++;
5624
5625 compute_subscript_distance (ddr);
5626 if (build_classic_dist_vector (ddr, loop_nest))
5627 build_classic_dir_vector (ddr);
5628 }
5629
5630 /* Returns true when all the access functions of A are affine or
5631 constant with respect to LOOP_NEST. */
5632
5633 static bool
5634 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5635 const class loop *loop_nest)
5636 {
5637 vec<tree> fns = DR_ACCESS_FNS (a);
5638 for (tree t : fns)
5639 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5640 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5641 return false;
5642
5643 return true;
5644 }
5645
5646 /* This computes the affine dependence relation between A and B with
5647 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5648 independence between two accesses, while CHREC_DONT_KNOW is used
5649 for representing the unknown relation.
5650
5651 Note that it is possible to stop the computation of the dependence
5652 relation the first time we detect a CHREC_KNOWN element for a given
5653 subscript. */
5654
5655 void
5656 compute_affine_dependence (struct data_dependence_relation *ddr,
5657 class loop *loop_nest)
5658 {
5659 struct data_reference *dra = DDR_A (ddr);
5660 struct data_reference *drb = DDR_B (ddr);
5661
5662 if (dump_file && (dump_flags & TDF_DETAILS))
5663 {
5664 fprintf (dump_file, "(compute_affine_dependence\n");
5665 fprintf (dump_file, " ref_a: ");
5666 print_generic_expr (dump_file, DR_REF (dra));
5667 fprintf (dump_file, ", stmt_a: ");
5668 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5669 fprintf (dump_file, " ref_b: ");
5670 print_generic_expr (dump_file, DR_REF (drb));
5671 fprintf (dump_file, ", stmt_b: ");
5672 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5673 }
5674
5675 /* Analyze only when the dependence relation is not yet known. */
5676 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5677 {
5678 dependence_stats.num_dependence_tests++;
5679
5680 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5681 && access_functions_are_affine_or_constant_p (drb, loop_nest))
5682 subscript_dependence_tester (ddr, loop_nest);
5683
5684 /* As a last case, if the dependence cannot be determined, or if
5685 the dependence is considered too difficult to determine, answer
5686 "don't know". */
5687 else
5688 {
5689 dependence_stats.num_dependence_undetermined++;
5690
5691 if (dump_file && (dump_flags & TDF_DETAILS))
5692 {
5693 fprintf (dump_file, "Data ref a:\n");
5694 dump_data_reference (dump_file, dra);
5695 fprintf (dump_file, "Data ref b:\n");
5696 dump_data_reference (dump_file, drb);
5697 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5698 }
5699 finalize_ddr_dependent (ddr, chrec_dont_know);
5700 }
5701 }
5702
5703 if (dump_file && (dump_flags & TDF_DETAILS))
5704 {
5705 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5706 fprintf (dump_file, ") -> no dependence\n");
5707 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5708 fprintf (dump_file, ") -> dependence analysis failed\n");
5709 else
5710 fprintf (dump_file, ")\n");
5711 }
5712 }
5713
5714 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5715 the data references in DATAREFS, in the LOOP_NEST. When
5716 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5717 relations. Return true when successful, i.e. data references number
5718 is small enough to be handled. */
5719
5720 bool
5721 compute_all_dependences (const vec<data_reference_p> &datarefs,
5722 vec<ddr_p> *dependence_relations,
5723 const vec<loop_p> &loop_nest,
5724 bool compute_self_and_rr)
5725 {
5726 struct data_dependence_relation *ddr;
5727 struct data_reference *a, *b;
5728 unsigned int i, j;
5729
5730 if ((int) datarefs.length ()
5731 > param_loop_max_datarefs_for_datadeps)
5732 {
5733 struct data_dependence_relation *ddr;
5734
5735 /* Insert a single relation into dependence_relations:
5736 chrec_dont_know. */
5737 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5738 dependence_relations->safe_push (ddr);
5739 return false;
5740 }
5741
5742 FOR_EACH_VEC_ELT (datarefs, i, a)
5743 for (j = i + 1; datarefs.iterate (j, &b); j++)
5744 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5745 {
5746 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5747 dependence_relations->safe_push (ddr);
5748 if (loop_nest.exists ())
5749 compute_affine_dependence (ddr, loop_nest[0]);
5750 }
5751
5752 if (compute_self_and_rr)
5753 FOR_EACH_VEC_ELT (datarefs, i, a)
5754 {
5755 ddr = initialize_data_dependence_relation (a, a, loop_nest);
5756 dependence_relations->safe_push (ddr);
5757 if (loop_nest.exists ())
5758 compute_affine_dependence (ddr, loop_nest[0]);
5759 }
5760
5761 return true;
5762 }
5763
5764 /* Describes a location of a memory reference. */
5765
5766 struct data_ref_loc
5767 {
5768 /* The memory reference. */
5769 tree ref;
5770
5771 /* True if the memory reference is read. */
5772 bool is_read;
5773
5774 /* True if the data reference is conditional within the containing
5775 statement, i.e. if it might not occur even when the statement
5776 is executed and runs to completion. */
5777 bool is_conditional_in_stmt;
5778 };
5779
5780
5781 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5782 true if STMT clobbers memory, false otherwise. */
5783
5784 static bool
5785 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5786 {
5787 bool clobbers_memory = false;
5788 data_ref_loc ref;
5789 tree op0, op1;
5790 enum gimple_code stmt_code = gimple_code (stmt);
5791
5792 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5793 As we cannot model data-references to not spelled out
5794 accesses give up if they may occur. */
5795 if (stmt_code == GIMPLE_CALL
5796 && !(gimple_call_flags (stmt) & ECF_CONST))
5797 {
5798 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5799 if (gimple_call_internal_p (stmt))
5800 switch (gimple_call_internal_fn (stmt))
5801 {
5802 case IFN_GOMP_SIMD_LANE:
5803 {
5804 class loop *loop = gimple_bb (stmt)->loop_father;
5805 tree uid = gimple_call_arg (stmt, 0);
5806 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5807 if (loop == NULL
5808 || loop->simduid != SSA_NAME_VAR (uid))
5809 clobbers_memory = true;
5810 break;
5811 }
5812 case IFN_MASK_LOAD:
5813 case IFN_MASK_STORE:
5814 break;
5815 default:
5816 clobbers_memory = true;
5817 break;
5818 }
5819 else
5820 clobbers_memory = true;
5821 }
5822 else if (stmt_code == GIMPLE_ASM
5823 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5824 || gimple_vuse (stmt)))
5825 clobbers_memory = true;
5826
5827 if (!gimple_vuse (stmt))
5828 return clobbers_memory;
5829
5830 if (stmt_code == GIMPLE_ASSIGN)
5831 {
5832 tree base;
5833 op0 = gimple_assign_lhs (stmt);
5834 op1 = gimple_assign_rhs1 (stmt);
5835
5836 if (DECL_P (op1)
5837 || (REFERENCE_CLASS_P (op1)
5838 && (base = get_base_address (op1))
5839 && TREE_CODE (base) != SSA_NAME
5840 && !is_gimple_min_invariant (base)))
5841 {
5842 ref.ref = op1;
5843 ref.is_read = true;
5844 ref.is_conditional_in_stmt = false;
5845 references->safe_push (ref);
5846 }
5847 }
5848 else if (stmt_code == GIMPLE_CALL)
5849 {
5850 unsigned i, n;
5851 tree ptr, type;
5852 unsigned int align;
5853
5854 ref.is_read = false;
5855 if (gimple_call_internal_p (stmt))
5856 switch (gimple_call_internal_fn (stmt))
5857 {
5858 case IFN_MASK_LOAD:
5859 if (gimple_call_lhs (stmt) == NULL_TREE)
5860 break;
5861 ref.is_read = true;
5862 /* FALLTHRU */
5863 case IFN_MASK_STORE:
5864 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5865 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5866 if (ref.is_read)
5867 type = TREE_TYPE (gimple_call_lhs (stmt));
5868 else
5869 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5870 if (TYPE_ALIGN (type) != align)
5871 type = build_aligned_type (type, align);
5872 ref.is_conditional_in_stmt = true;
5873 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5874 ptr);
5875 references->safe_push (ref);
5876 return false;
5877 default:
5878 break;
5879 }
5880
5881 op0 = gimple_call_lhs (stmt);
5882 n = gimple_call_num_args (stmt);
5883 for (i = 0; i < n; i++)
5884 {
5885 op1 = gimple_call_arg (stmt, i);
5886
5887 if (DECL_P (op1)
5888 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5889 {
5890 ref.ref = op1;
5891 ref.is_read = true;
5892 ref.is_conditional_in_stmt = false;
5893 references->safe_push (ref);
5894 }
5895 }
5896 }
5897 else
5898 return clobbers_memory;
5899
5900 if (op0
5901 && (DECL_P (op0)
5902 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5903 {
5904 ref.ref = op0;
5905 ref.is_read = false;
5906 ref.is_conditional_in_stmt = false;
5907 references->safe_push (ref);
5908 }
5909 return clobbers_memory;
5910 }
5911
5912
5913 /* Returns true if the loop-nest has any data reference. */
5914
5915 bool
5916 loop_nest_has_data_refs (loop_p loop)
5917 {
5918 basic_block *bbs = get_loop_body (loop);
5919 auto_vec<data_ref_loc, 3> references;
5920
5921 for (unsigned i = 0; i < loop->num_nodes; i++)
5922 {
5923 basic_block bb = bbs[i];
5924 gimple_stmt_iterator bsi;
5925
5926 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5927 {
5928 gimple *stmt = gsi_stmt (bsi);
5929 get_references_in_stmt (stmt, &references);
5930 if (references.length ())
5931 {
5932 free (bbs);
5933 return true;
5934 }
5935 }
5936 }
5937 free (bbs);
5938 return false;
5939 }
5940
5941 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5942 reference, returns false, otherwise returns true. NEST is the outermost
5943 loop of the loop nest in which the references should be analyzed. */
5944
5945 opt_result
5946 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5947 vec<data_reference_p> *datarefs)
5948 {
5949 auto_vec<data_ref_loc, 2> references;
5950 data_reference_p dr;
5951
5952 if (get_references_in_stmt (stmt, &references))
5953 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5954 stmt);
5955
5956 for (const data_ref_loc &ref : references)
5957 {
5958 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5959 loop_containing_stmt (stmt), ref.ref,
5960 stmt, ref.is_read, ref.is_conditional_in_stmt);
5961 gcc_assert (dr != NULL);
5962 datarefs->safe_push (dr);
5963 }
5964
5965 return opt_result::success ();
5966 }
5967
5968 /* Stores the data references in STMT to DATAREFS. If there is an
5969 unanalyzable reference, returns false, otherwise returns true.
5970 NEST is the outermost loop of the loop nest in which the references
5971 should be instantiated, LOOP is the loop in which the references
5972 should be analyzed. */
5973
5974 bool
5975 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5976 vec<data_reference_p> *datarefs)
5977 {
5978 auto_vec<data_ref_loc, 2> references;
5979 bool ret = true;
5980 data_reference_p dr;
5981
5982 if (get_references_in_stmt (stmt, &references))
5983 return false;
5984
5985 for (const data_ref_loc &ref : references)
5986 {
5987 dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5988 ref.is_conditional_in_stmt);
5989 gcc_assert (dr != NULL);
5990 datarefs->safe_push (dr);
5991 }
5992
5993 return ret;
5994 }
5995
5996 /* Search the data references in LOOP, and record the information into
5997 DATAREFS. Returns chrec_dont_know when failing to analyze a
5998 difficult case, returns NULL_TREE otherwise. */
5999
6000 tree
6001 find_data_references_in_bb (class loop *loop, basic_block bb,
6002 vec<data_reference_p> *datarefs)
6003 {
6004 gimple_stmt_iterator bsi;
6005
6006 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6007 {
6008 gimple *stmt = gsi_stmt (bsi);
6009
6010 if (!find_data_references_in_stmt (loop, stmt, datarefs))
6011 {
6012 struct data_reference *res;
6013 res = XCNEW (struct data_reference);
6014 datarefs->safe_push (res);
6015
6016 return chrec_dont_know;
6017 }
6018 }
6019
6020 return NULL_TREE;
6021 }
6022
6023 /* Search the data references in LOOP, and record the information into
6024 DATAREFS. Returns chrec_dont_know when failing to analyze a
6025 difficult case, returns NULL_TREE otherwise.
6026
6027 TODO: This function should be made smarter so that it can handle address
6028 arithmetic as if they were array accesses, etc. */
6029
6030 tree
6031 find_data_references_in_loop (class loop *loop,
6032 vec<data_reference_p> *datarefs)
6033 {
6034 basic_block bb, *bbs;
6035 unsigned int i;
6036
6037 bbs = get_loop_body_in_dom_order (loop);
6038
6039 for (i = 0; i < loop->num_nodes; i++)
6040 {
6041 bb = bbs[i];
6042
6043 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6044 {
6045 free (bbs);
6046 return chrec_dont_know;
6047 }
6048 }
6049 free (bbs);
6050
6051 return NULL_TREE;
6052 }
6053
6054 /* Return the alignment in bytes that DRB is guaranteed to have at all
6055 times. */
6056
6057 unsigned int
6058 dr_alignment (innermost_loop_behavior *drb)
6059 {
6060 /* Get the alignment of BASE_ADDRESS + INIT. */
6061 unsigned int alignment = drb->base_alignment;
6062 unsigned int misalignment = (drb->base_misalignment
6063 + TREE_INT_CST_LOW (drb->init));
6064 if (misalignment != 0)
6065 alignment = MIN (alignment, misalignment & -misalignment);
6066
6067 /* Cap it to the alignment of OFFSET. */
6068 if (!integer_zerop (drb->offset))
6069 alignment = MIN (alignment, drb->offset_alignment);
6070
6071 /* Cap it to the alignment of STEP. */
6072 if (!integer_zerop (drb->step))
6073 alignment = MIN (alignment, drb->step_alignment);
6074
6075 return alignment;
6076 }
6077
6078 /* If BASE is a pointer-typed SSA name, try to find the object that it
6079 is based on. Return this object X on success and store the alignment
6080 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6081
6082 static tree
6083 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6084 {
6085 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6086 return NULL_TREE;
6087
6088 gimple *def = SSA_NAME_DEF_STMT (base);
6089 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6090
6091 /* Peel chrecs and record the minimum alignment preserved by
6092 all steps. */
6093 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6094 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6095 {
6096 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6097 alignment = MIN (alignment, step_alignment);
6098 base = CHREC_LEFT (base);
6099 }
6100
6101 /* Punt if the expression is too complicated to handle. */
6102 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6103 return NULL_TREE;
6104
6105 /* The only useful cases are those for which a dereference folds to something
6106 other than an INDIRECT_REF. */
6107 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6108 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6109 if (!ref)
6110 return NULL_TREE;
6111
6112 /* Analyze the base to which the steps we peeled were applied. */
6113 poly_int64 bitsize, bitpos, bytepos;
6114 machine_mode mode;
6115 int unsignedp, reversep, volatilep;
6116 tree offset;
6117 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6118 &unsignedp, &reversep, &volatilep);
6119 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6120 return NULL_TREE;
6121
6122 /* Restrict the alignment to that guaranteed by the offsets. */
6123 unsigned int bytepos_alignment = known_alignment (bytepos);
6124 if (bytepos_alignment != 0)
6125 alignment = MIN (alignment, bytepos_alignment);
6126 if (offset)
6127 {
6128 unsigned int offset_alignment = highest_pow2_factor (offset);
6129 alignment = MIN (alignment, offset_alignment);
6130 }
6131
6132 *alignment_out = alignment;
6133 return base;
6134 }
6135
6136 /* Return the object whose alignment would need to be changed in order
6137 to increase the alignment of ADDR. Store the maximum achievable
6138 alignment in *MAX_ALIGNMENT. */
6139
6140 tree
6141 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6142 {
6143 tree base = get_base_for_alignment_1 (addr, max_alignment);
6144 if (base)
6145 return base;
6146
6147 if (TREE_CODE (addr) == ADDR_EXPR)
6148 addr = TREE_OPERAND (addr, 0);
6149 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6150 return addr;
6151 }
6152
6153 /* Recursive helper function. */
6154
6155 static bool
6156 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6157 {
6158 /* Inner loops of the nest should not contain siblings. Example:
6159 when there are two consecutive loops,
6160
6161 | loop_0
6162 | loop_1
6163 | A[{0, +, 1}_1]
6164 | endloop_1
6165 | loop_2
6166 | A[{0, +, 1}_2]
6167 | endloop_2
6168 | endloop_0
6169
6170 the dependence relation cannot be captured by the distance
6171 abstraction. */
6172 if (loop->next)
6173 return false;
6174
6175 loop_nest->safe_push (loop);
6176 if (loop->inner)
6177 return find_loop_nest_1 (loop->inner, loop_nest);
6178 return true;
6179 }
6180
6181 /* Return false when the LOOP is not well nested. Otherwise return
6182 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6183 contain the loops from the outermost to the innermost, as they will
6184 appear in the classic distance vector. */
6185
6186 bool
6187 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6188 {
6189 loop_nest->safe_push (loop);
6190 if (loop->inner)
6191 return find_loop_nest_1 (loop->inner, loop_nest);
6192 return true;
6193 }
6194
6195 /* Returns true when the data dependences have been computed, false otherwise.
6196 Given a loop nest LOOP, the following vectors are returned:
6197 DATAREFS is initialized to all the array elements contained in this loop,
6198 DEPENDENCE_RELATIONS contains the relations between the data references.
6199 Compute read-read and self relations if
6200 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6201
6202 bool
6203 compute_data_dependences_for_loop (class loop *loop,
6204 bool compute_self_and_read_read_dependences,
6205 vec<loop_p> *loop_nest,
6206 vec<data_reference_p> *datarefs,
6207 vec<ddr_p> *dependence_relations)
6208 {
6209 bool res = true;
6210
6211 memset (&dependence_stats, 0, sizeof (dependence_stats));
6212
6213 /* If the loop nest is not well formed, or one of the data references
6214 is not computable, give up without spending time to compute other
6215 dependences. */
6216 if (!loop
6217 || !find_loop_nest (loop, loop_nest)
6218 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6219 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6220 compute_self_and_read_read_dependences))
6221 res = false;
6222
6223 if (dump_file && (dump_flags & TDF_STATS))
6224 {
6225 fprintf (dump_file, "Dependence tester statistics:\n");
6226
6227 fprintf (dump_file, "Number of dependence tests: %d\n",
6228 dependence_stats.num_dependence_tests);
6229 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6230 dependence_stats.num_dependence_dependent);
6231 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6232 dependence_stats.num_dependence_independent);
6233 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6234 dependence_stats.num_dependence_undetermined);
6235
6236 fprintf (dump_file, "Number of subscript tests: %d\n",
6237 dependence_stats.num_subscript_tests);
6238 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6239 dependence_stats.num_subscript_undetermined);
6240 fprintf (dump_file, "Number of same subscript function: %d\n",
6241 dependence_stats.num_same_subscript_function);
6242
6243 fprintf (dump_file, "Number of ziv tests: %d\n",
6244 dependence_stats.num_ziv);
6245 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6246 dependence_stats.num_ziv_dependent);
6247 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6248 dependence_stats.num_ziv_independent);
6249 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6250 dependence_stats.num_ziv_unimplemented);
6251
6252 fprintf (dump_file, "Number of siv tests: %d\n",
6253 dependence_stats.num_siv);
6254 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6255 dependence_stats.num_siv_dependent);
6256 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6257 dependence_stats.num_siv_independent);
6258 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6259 dependence_stats.num_siv_unimplemented);
6260
6261 fprintf (dump_file, "Number of miv tests: %d\n",
6262 dependence_stats.num_miv);
6263 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6264 dependence_stats.num_miv_dependent);
6265 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6266 dependence_stats.num_miv_independent);
6267 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6268 dependence_stats.num_miv_unimplemented);
6269 }
6270
6271 return res;
6272 }
6273
6274 /* Free the memory used by a data dependence relation DDR. */
6275
6276 void
6277 free_dependence_relation (struct data_dependence_relation *ddr)
6278 {
6279 if (ddr == NULL)
6280 return;
6281
6282 if (DDR_SUBSCRIPTS (ddr).exists ())
6283 free_subscripts (DDR_SUBSCRIPTS (ddr));
6284 DDR_DIST_VECTS (ddr).release ();
6285 DDR_DIR_VECTS (ddr).release ();
6286
6287 free (ddr);
6288 }
6289
6290 /* Free the memory used by the data dependence relations from
6291 DEPENDENCE_RELATIONS. */
6292
6293 void
6294 free_dependence_relations (vec<ddr_p>& dependence_relations)
6295 {
6296 for (data_dependence_relation *ddr : dependence_relations)
6297 if (ddr)
6298 free_dependence_relation (ddr);
6299
6300 dependence_relations.release ();
6301 }
6302
6303 /* Free the memory used by the data references from DATAREFS. */
6304
6305 void
6306 free_data_refs (vec<data_reference_p>& datarefs)
6307 {
6308 for (data_reference *dr : datarefs)
6309 free_data_ref (dr);
6310 datarefs.release ();
6311 }
6312
6313 /* Common routine implementing both dr_direction_indicator and
6314 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6315 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6316 Return the step as the indicator otherwise. */
6317
6318 static tree
6319 dr_step_indicator (struct data_reference *dr, int useful_min)
6320 {
6321 tree step = DR_STEP (dr);
6322 if (!step)
6323 return NULL_TREE;
6324 STRIP_NOPS (step);
6325 /* Look for cases where the step is scaled by a positive constant
6326 integer, which will often be the access size. If the multiplication
6327 doesn't change the sign (due to overflow effects) then we can
6328 test the unscaled value instead. */
6329 if (TREE_CODE (step) == MULT_EXPR
6330 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6331 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6332 {
6333 tree factor = TREE_OPERAND (step, 1);
6334 step = TREE_OPERAND (step, 0);
6335
6336 /* Strip widening and truncating conversions as well as nops. */
6337 if (CONVERT_EXPR_P (step)
6338 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6339 step = TREE_OPERAND (step, 0);
6340 tree type = TREE_TYPE (step);
6341
6342 /* Get the range of step values that would not cause overflow. */
6343 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6344 / wi::to_widest (factor));
6345 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6346 / wi::to_widest (factor));
6347
6348 /* Get the range of values that the unconverted step actually has. */
6349 wide_int step_min, step_max;
6350 value_range vr;
6351 if (TREE_CODE (step) != SSA_NAME
6352 || !get_range_query (cfun)->range_of_expr (vr, step)
6353 || vr.kind () != VR_RANGE)
6354 {
6355 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6356 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6357 }
6358 else
6359 {
6360 step_min = vr.lower_bound ();
6361 step_max = vr.upper_bound ();
6362 }
6363
6364 /* Check whether the unconverted step has an acceptable range. */
6365 signop sgn = TYPE_SIGN (type);
6366 if (wi::les_p (minv, widest_int::from (step_min, sgn))
6367 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6368 {
6369 if (wi::ge_p (step_min, useful_min, sgn))
6370 return ssize_int (useful_min);
6371 else if (wi::lt_p (step_max, 0, sgn))
6372 return ssize_int (-1);
6373 else
6374 return fold_convert (ssizetype, step);
6375 }
6376 }
6377 return DR_STEP (dr);
6378 }
6379
6380 /* Return a value that is negative iff DR has a negative step. */
6381
6382 tree
6383 dr_direction_indicator (struct data_reference *dr)
6384 {
6385 return dr_step_indicator (dr, 0);
6386 }
6387
6388 /* Return a value that is zero iff DR has a zero step. */
6389
6390 tree
6391 dr_zero_step_indicator (struct data_reference *dr)
6392 {
6393 return dr_step_indicator (dr, 1);
6394 }
6395
6396 /* Return true if DR is known to have a nonnegative (but possibly zero)
6397 step. */
6398
6399 bool
6400 dr_known_forward_stride_p (struct data_reference *dr)
6401 {
6402 tree indicator = dr_direction_indicator (dr);
6403 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6404 fold_convert (ssizetype, indicator),
6405 ssize_int (0));
6406 return neg_step_val && integer_zerop (neg_step_val);
6407 }