]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-data-ref.c
* gcc-interface/Makefile.in (Cygwin/Mingw): Fix previous change.
[thirdparty/gcc.git] / gcc / tree-data-ref.c
CommitLineData
56cf8686 1/* Data references and dependences detectors.
23a5b65a 2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
0ff4040e 3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
56cf8686
SP
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
56cf8686
SP
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
56cf8686
SP
20
21/* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
b8698a0f
L
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
56cf8686
SP
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
b8698a0f 30
56cf8686 31 The goals of this analysis are:
b8698a0f 32
56cf8686
SP
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
b8698a0f 36
56cf8686
SP
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
b8698a0f 39
56cf8686
SP
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
b8698a0f
L
45
46 - to define a knowledge base for storing the data dependence
56cf8686 47 information,
b8698a0f 48
56cf8686 49 - to define an interface to access this data.
b8698a0f
L
50
51
56cf8686 52 Definitions:
b8698a0f 53
56cf8686
SP
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
b8698a0f 60 solutions are integer constants, for example the equation
56cf8686
SP
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
b8698a0f 63
56cf8686 64 References:
b8698a0f 65
56cf8686
SP
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
b8698a0f
L
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
56cf8686
SP
71 by Utpal Banerjee.
72
b8698a0f 73
56cf8686
SP
74*/
75
76#include "config.h"
77#include "system.h"
78#include "coretypes.h"
cf2d1b38 79#include "tree.h"
d8a2d370 80#include "expr.h"
cf835838 81#include "gimple-pretty-print.h"
60393bbc
AM
82#include "predict.h"
83#include "vec.h"
84#include "hashtab.h"
85#include "hash-set.h"
86#include "machmode.h"
87#include "tm.h"
88#include "hard-reg-set.h"
89#include "input.h"
90#include "function.h"
91#include "dominance.h"
92#include "cfg.h"
2fb9a547
AM
93#include "basic-block.h"
94#include "tree-ssa-alias.h"
95#include "internal-fn.h"
96#include "gimple-expr.h"
97#include "is-a.h"
442b4905 98#include "gimple.h"
5be5c238 99#include "gimple-iterator.h"
e28030cf 100#include "tree-ssa-loop-niter.h"
442b4905 101#include "tree-ssa-loop.h"
7a300452 102#include "tree-ssa.h"
56cf8686 103#include "cfgloop.h"
56cf8686
SP
104#include "tree-data-ref.h"
105#include "tree-scalar-evolution.h"
7ee2468b 106#include "dumpfile.h"
946e1bc7 107#include "langhooks.h"
02f5d6c5 108#include "tree-affine.h"
3881dee9 109#include "params.h"
56cf8686 110
0ff4040e
SP
111static struct datadep_stats
112{
113 int num_dependence_tests;
114 int num_dependence_dependent;
115 int num_dependence_independent;
116 int num_dependence_undetermined;
117
118 int num_subscript_tests;
119 int num_subscript_undetermined;
120 int num_same_subscript_function;
121
122 int num_ziv;
123 int num_ziv_independent;
124 int num_ziv_dependent;
125 int num_ziv_unimplemented;
126
127 int num_siv;
128 int num_siv_independent;
129 int num_siv_dependent;
130 int num_siv_unimplemented;
131
132 int num_miv;
133 int num_miv_independent;
134 int num_miv_dependent;
135 int num_miv_unimplemented;
136} dependence_stats;
137
ba42e045
SP
138static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
139 struct data_reference *,
da9a21f4
SP
140 struct data_reference *,
141 struct loop *);
56cf8686
SP
142/* Returns true iff A divides B. */
143
b8698a0f 144static inline bool
ed7a4b4b 145tree_fold_divides_p (const_tree a, const_tree b)
56cf8686 146{
b73a6056
RS
147 gcc_assert (TREE_CODE (a) == INTEGER_CST);
148 gcc_assert (TREE_CODE (b) == INTEGER_CST);
d35936ab 149 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
56cf8686
SP
150}
151
86df10e3
SP
152/* Returns true iff A divides B. */
153
b8698a0f 154static inline bool
86df10e3
SP
155int_divides_p (int a, int b)
156{
157 return ((b % a) == 0);
56cf8686
SP
158}
159
160\f
161
b8698a0f 162/* Dump into FILE all the data references from DATAREFS. */
56cf8686 163
aeb83f09 164static void
9771b263 165dump_data_references (FILE *file, vec<data_reference_p> datarefs)
56cf8686
SP
166{
167 unsigned int i;
ebf78a47
SP
168 struct data_reference *dr;
169
9771b263 170 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebf78a47 171 dump_data_reference (file, dr);
56cf8686
SP
172}
173
7b3b6ae4
LC
174/* Unified dump into FILE all the data references from DATAREFS. */
175
176DEBUG_FUNCTION void
177debug (vec<data_reference_p> &ref)
178{
179 dump_data_references (stderr, ref);
180}
181
182DEBUG_FUNCTION void
183debug (vec<data_reference_p> *ptr)
184{
185 if (ptr)
186 debug (*ptr);
187 else
188 fprintf (stderr, "<nil>\n");
189}
190
191
b8698a0f 192/* Dump into STDERR all the data references from DATAREFS. */
a37d995a 193
24e47c76 194DEBUG_FUNCTION void
9771b263 195debug_data_references (vec<data_reference_p> datarefs)
a37d995a
SP
196{
197 dump_data_references (stderr, datarefs);
198}
199
a37d995a
SP
200/* Print to STDERR the data_reference DR. */
201
24e47c76 202DEBUG_FUNCTION void
a37d995a
SP
203debug_data_reference (struct data_reference *dr)
204{
205 dump_data_reference (stderr, dr);
206}
207
56cf8686
SP
208/* Dump function for a DATA_REFERENCE structure. */
209
b8698a0f
L
210void
211dump_data_reference (FILE *outf,
56cf8686
SP
212 struct data_reference *dr)
213{
214 unsigned int i;
b8698a0f 215
28c5db57
SP
216 fprintf (outf, "#(Data Ref: \n");
217 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
218 fprintf (outf, "# stmt: ");
726a989a 219 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
03922af3 220 fprintf (outf, "# ref: ");
56cf8686 221 print_generic_stmt (outf, DR_REF (dr), 0);
03922af3 222 fprintf (outf, "# base_object: ");
86a07404 223 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
b8698a0f 224
56cf8686
SP
225 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
226 {
03922af3 227 fprintf (outf, "# Access function %d: ", i);
56cf8686
SP
228 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
229 }
03922af3 230 fprintf (outf, "#)\n");
56cf8686
SP
231}
232
7b3b6ae4
LC
233/* Unified dump function for a DATA_REFERENCE structure. */
234
235DEBUG_FUNCTION void
236debug (data_reference &ref)
237{
238 dump_data_reference (stderr, &ref);
239}
240
241DEBUG_FUNCTION void
242debug (data_reference *ptr)
243{
244 if (ptr)
245 debug (*ptr);
246 else
247 fprintf (stderr, "<nil>\n");
248}
249
250
d93817c4
ZD
251/* Dumps the affine function described by FN to the file OUTF. */
252
253static void
254dump_affine_function (FILE *outf, affine_fn fn)
255{
256 unsigned i;
257 tree coef;
258
9771b263
DN
259 print_generic_expr (outf, fn[0], TDF_SLIM);
260 for (i = 1; fn.iterate (i, &coef); i++)
d93817c4
ZD
261 {
262 fprintf (outf, " + ");
263 print_generic_expr (outf, coef, TDF_SLIM);
264 fprintf (outf, " * x_%u", i);
265 }
266}
267
268/* Dumps the conflict function CF to the file OUTF. */
269
270static void
271dump_conflict_function (FILE *outf, conflict_function *cf)
272{
273 unsigned i;
274
275 if (cf->n == NO_DEPENDENCE)
bcf1ef00 276 fprintf (outf, "no dependence");
d93817c4 277 else if (cf->n == NOT_KNOWN)
bcf1ef00 278 fprintf (outf, "not known");
d93817c4
ZD
279 else
280 {
281 for (i = 0; i < cf->n; i++)
282 {
bcf1ef00
RB
283 if (i != 0)
284 fprintf (outf, " ");
d93817c4
ZD
285 fprintf (outf, "[");
286 dump_affine_function (outf, cf->fns[i]);
bcf1ef00 287 fprintf (outf, "]");
d93817c4
ZD
288 }
289 }
290}
291
86df10e3
SP
292/* Dump function for a SUBSCRIPT structure. */
293
aeb83f09 294static void
86df10e3
SP
295dump_subscript (FILE *outf, struct subscript *subscript)
296{
d93817c4 297 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
86df10e3
SP
298
299 fprintf (outf, "\n (subscript \n");
300 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
d93817c4
ZD
301 dump_conflict_function (outf, cf);
302 if (CF_NONTRIVIAL_P (cf))
86df10e3
SP
303 {
304 tree last_iteration = SUB_LAST_CONFLICT (subscript);
bcf1ef00
RB
305 fprintf (outf, "\n last_conflict: ");
306 print_generic_expr (outf, last_iteration, 0);
86df10e3 307 }
b8698a0f 308
d93817c4 309 cf = SUB_CONFLICTS_IN_B (subscript);
bcf1ef00 310 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
d93817c4
ZD
311 dump_conflict_function (outf, cf);
312 if (CF_NONTRIVIAL_P (cf))
86df10e3
SP
313 {
314 tree last_iteration = SUB_LAST_CONFLICT (subscript);
bcf1ef00
RB
315 fprintf (outf, "\n last_conflict: ");
316 print_generic_expr (outf, last_iteration, 0);
86df10e3
SP
317 }
318
bcf1ef00
RB
319 fprintf (outf, "\n (Subscript distance: ");
320 print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
321 fprintf (outf, " ))\n");
86df10e3
SP
322}
323
0ff4040e
SP
324/* Print the classic direction vector DIRV to OUTF. */
325
aeb83f09 326static void
0ff4040e
SP
327print_direction_vector (FILE *outf,
328 lambda_vector dirv,
329 int length)
330{
331 int eq;
332
333 for (eq = 0; eq < length; eq++)
334 {
81f40b79
ILT
335 enum data_dependence_direction dir = ((enum data_dependence_direction)
336 dirv[eq]);
0ff4040e
SP
337
338 switch (dir)
339 {
340 case dir_positive:
341 fprintf (outf, " +");
342 break;
343 case dir_negative:
344 fprintf (outf, " -");
345 break;
346 case dir_equal:
347 fprintf (outf, " =");
348 break;
349 case dir_positive_or_equal:
350 fprintf (outf, " +=");
351 break;
352 case dir_positive_or_negative:
353 fprintf (outf, " +-");
354 break;
355 case dir_negative_or_equal:
356 fprintf (outf, " -=");
357 break;
358 case dir_star:
359 fprintf (outf, " *");
360 break;
361 default:
362 fprintf (outf, "indep");
363 break;
364 }
365 }
366 fprintf (outf, "\n");
367}
368
ba42e045
SP
369/* Print a vector of direction vectors. */
370
aeb83f09 371static void
9771b263 372print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
ba42e045
SP
373 int length)
374{
375 unsigned j;
376 lambda_vector v;
377
9771b263 378 FOR_EACH_VEC_ELT (dir_vects, j, v)
ba42e045
SP
379 print_direction_vector (outf, v, length);
380}
381
b305e3da
SP
382/* Print out a vector VEC of length N to OUTFILE. */
383
384static inline void
385print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
386{
387 int i;
388
389 for (i = 0; i < n; i++)
390 fprintf (outfile, "%3d ", vector[i]);
391 fprintf (outfile, "\n");
392}
393
ba42e045
SP
394/* Print a vector of distance vectors. */
395
aeb83f09 396static void
9771b263 397print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
aeb83f09 398 int length)
ba42e045
SP
399{
400 unsigned j;
401 lambda_vector v;
402
9771b263 403 FOR_EACH_VEC_ELT (dist_vects, j, v)
ba42e045
SP
404 print_lambda_vector (outf, v, length);
405}
406
56cf8686
SP
407/* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408
aeb83f09 409static void
b8698a0f 410dump_data_dependence_relation (FILE *outf,
56cf8686
SP
411 struct data_dependence_relation *ddr)
412{
56cf8686 413 struct data_reference *dra, *drb;
86df10e3 414
86df10e3 415 fprintf (outf, "(Data Dep: \n");
dea61d92 416
ed2024ba
MJ
417 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
418 {
b61b1f17
MM
419 if (ddr)
420 {
421 dra = DDR_A (ddr);
422 drb = DDR_B (ddr);
423 if (dra)
424 dump_data_reference (outf, dra);
425 else
426 fprintf (outf, " (nil)\n");
427 if (drb)
428 dump_data_reference (outf, drb);
429 else
430 fprintf (outf, " (nil)\n");
431 }
ed2024ba
MJ
432 fprintf (outf, " (don't know)\n)\n");
433 return;
434 }
435
436 dra = DDR_A (ddr);
437 drb = DDR_B (ddr);
dea61d92
SP
438 dump_data_reference (outf, dra);
439 dump_data_reference (outf, drb);
440
ed2024ba 441 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
56cf8686 442 fprintf (outf, " (no dependence)\n");
b8698a0f 443
86df10e3 444 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
56cf8686 445 {
86df10e3 446 unsigned int i;
ba42e045 447 struct loop *loopi;
304afda6 448
56cf8686
SP
449 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
450 {
56cf8686
SP
451 fprintf (outf, " access_fn_A: ");
452 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
453 fprintf (outf, " access_fn_B: ");
454 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
86df10e3 455 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
56cf8686 456 }
304afda6 457
3d8864c0 458 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
ba42e045 459 fprintf (outf, " loop nest: (");
9771b263 460 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
ba42e045
SP
461 fprintf (outf, "%d ", loopi->num);
462 fprintf (outf, ")\n");
463
304afda6 464 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
56cf8686 465 {
304afda6
SP
466 fprintf (outf, " distance_vector: ");
467 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
ba42e045 468 DDR_NB_LOOPS (ddr));
86df10e3 469 }
304afda6
SP
470
471 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
86df10e3 472 {
304afda6 473 fprintf (outf, " direction_vector: ");
0ff4040e 474 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
ba42e045 475 DDR_NB_LOOPS (ddr));
56cf8686 476 }
56cf8686
SP
477 }
478
479 fprintf (outf, ")\n");
480}
481
aeb83f09 482/* Debug version. */
56cf8686 483
aeb83f09
RG
484DEBUG_FUNCTION void
485debug_data_dependence_relation (struct data_dependence_relation *ddr)
56cf8686 486{
aeb83f09
RG
487 dump_data_dependence_relation (stderr, ddr);
488}
b8698a0f 489
aeb83f09 490/* Dump into FILE all the dependence relations from DDRS. */
b8698a0f 491
aeb83f09
RG
492void
493dump_data_dependence_relations (FILE *file,
9771b263 494 vec<ddr_p> ddrs)
aeb83f09
RG
495{
496 unsigned int i;
497 struct data_dependence_relation *ddr;
b8698a0f 498
9771b263 499 FOR_EACH_VEC_ELT (ddrs, i, ddr)
aeb83f09
RG
500 dump_data_dependence_relation (file, ddr);
501}
b8698a0f 502
7b3b6ae4
LC
503DEBUG_FUNCTION void
504debug (vec<ddr_p> &ref)
505{
506 dump_data_dependence_relations (stderr, ref);
507}
508
509DEBUG_FUNCTION void
510debug (vec<ddr_p> *ptr)
511{
512 if (ptr)
513 debug (*ptr);
514 else
515 fprintf (stderr, "<nil>\n");
516}
517
518
aeb83f09 519/* Dump to STDERR all the dependence relations from DDRS. */
b8698a0f 520
aeb83f09 521DEBUG_FUNCTION void
9771b263 522debug_data_dependence_relations (vec<ddr_p> ddrs)
aeb83f09
RG
523{
524 dump_data_dependence_relations (stderr, ddrs);
56cf8686
SP
525}
526
86df10e3
SP
527/* Dumps the distance and direction vectors in FILE. DDRS contains
528 the dependence relations, and VECT_SIZE is the size of the
529 dependence vectors, or in other words the number of loops in the
530 considered nest. */
531
aeb83f09 532static void
9771b263 533dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
86df10e3 534{
304afda6 535 unsigned int i, j;
ebf78a47
SP
536 struct data_dependence_relation *ddr;
537 lambda_vector v;
86df10e3 538
9771b263 539 FOR_EACH_VEC_ELT (ddrs, i, ddr)
ebf78a47
SP
540 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
541 {
9771b263 542 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
ebf78a47
SP
543 {
544 fprintf (file, "DISTANCE_V (");
545 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
546 fprintf (file, ")\n");
547 }
548
9771b263 549 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
ebf78a47
SP
550 {
551 fprintf (file, "DIRECTION_V (");
552 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
553 fprintf (file, ")\n");
554 }
555 }
304afda6 556
86df10e3
SP
557 fprintf (file, "\n\n");
558}
559
560/* Dumps the data dependence relations DDRS in FILE. */
561
aeb83f09 562static void
9771b263 563dump_ddrs (FILE *file, vec<ddr_p> ddrs)
86df10e3
SP
564{
565 unsigned int i;
ebf78a47
SP
566 struct data_dependence_relation *ddr;
567
9771b263 568 FOR_EACH_VEC_ELT (ddrs, i, ddr)
ebf78a47 569 dump_data_dependence_relation (file, ddr);
86df10e3 570
86df10e3
SP
571 fprintf (file, "\n\n");
572}
573
aeb83f09 574DEBUG_FUNCTION void
9771b263 575debug_ddrs (vec<ddr_p> ddrs)
aeb83f09
RG
576{
577 dump_ddrs (stderr, ddrs);
578}
579
726a989a
RB
580/* Helper function for split_constant_offset. Expresses OP0 CODE OP1
581 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
582 constant of type ssizetype, and returns true. If we cannot do this
583 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
584 is returned. */
86a07404 585
726a989a
RB
586static bool
587split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
588 tree *var, tree *off)
86a07404 589{
3cb960c7
ZD
590 tree var0, var1;
591 tree off0, off1;
726a989a 592 enum tree_code ocode = code;
86a07404 593
726a989a
RB
594 *var = NULL_TREE;
595 *off = NULL_TREE;
86a07404 596
5be014d5 597 switch (code)
86a07404 598 {
3cb960c7
ZD
599 case INTEGER_CST:
600 *var = build_int_cst (type, 0);
726a989a
RB
601 *off = fold_convert (ssizetype, op0);
602 return true;
86a07404 603
5be014d5 604 case POINTER_PLUS_EXPR:
726a989a 605 ocode = PLUS_EXPR;
5be014d5 606 /* FALLTHROUGH */
3cb960c7
ZD
607 case PLUS_EXPR:
608 case MINUS_EXPR:
726a989a
RB
609 split_constant_offset (op0, &var0, &off0);
610 split_constant_offset (op1, &var1, &off1);
611 *var = fold_build2 (code, type, var0, var1);
612 *off = size_binop (ocode, off0, off1);
613 return true;
86a07404 614
86a07404 615 case MULT_EXPR:
726a989a
RB
616 if (TREE_CODE (op1) != INTEGER_CST)
617 return false;
3cb960c7 618
726a989a
RB
619 split_constant_offset (op0, &var0, &off0);
620 *var = fold_build2 (MULT_EXPR, type, var0, op1);
621 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
622 return true;
86a07404 623
3cb960c7
ZD
624 case ADDR_EXPR:
625 {
726a989a 626 tree base, poffset;
3cb960c7 627 HOST_WIDE_INT pbitsize, pbitpos;
ef4bddc2 628 machine_mode pmode;
3cb960c7 629 int punsignedp, pvolatilep;
86a07404 630
da4b6efc 631 op0 = TREE_OPERAND (op0, 0);
726a989a 632 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
b3ecff82 633 &pmode, &punsignedp, &pvolatilep, false);
86a07404 634
3cb960c7 635 if (pbitpos % BITS_PER_UNIT != 0)
726a989a 636 return false;
3cb960c7
ZD
637 base = build_fold_addr_expr (base);
638 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
86a07404 639
3cb960c7
ZD
640 if (poffset)
641 {
642 split_constant_offset (poffset, &poffset, &off1);
643 off0 = size_binop (PLUS_EXPR, off0, off1);
36ad7922 644 if (POINTER_TYPE_P (TREE_TYPE (base)))
5d49b6a7 645 base = fold_build_pointer_plus (base, poffset);
36ad7922
JJ
646 else
647 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
648 fold_convert (TREE_TYPE (base), poffset));
3cb960c7
ZD
649 }
650
6481b879
JJ
651 var0 = fold_convert (type, base);
652
653 /* If variable length types are involved, punt, otherwise casts
654 might be converted into ARRAY_REFs in gimplify_conversion.
655 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
656 possibly no longer appears in current GIMPLE, might resurface.
657 This perhaps could run
1a87cf0c 658 if (CONVERT_EXPR_P (var0))
6481b879
JJ
659 {
660 gimplify_conversion (&var0);
661 // Attempt to fill in any within var0 found ARRAY_REF's
662 // element size from corresponding op embedded ARRAY_REF,
663 // if unsuccessful, just punt.
664 } */
665 while (POINTER_TYPE_P (type))
666 type = TREE_TYPE (type);
667 if (int_size_in_bytes (type) < 0)
726a989a 668 return false;
6481b879
JJ
669
670 *var = var0;
3cb960c7 671 *off = off0;
726a989a 672 return true;
3cb960c7 673 }
86a07404 674
06cb4f79
JS
675 case SSA_NAME:
676 {
726a989a
RB
677 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
678 enum tree_code subcode;
06cb4f79 679
726a989a
RB
680 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
681 return false;
682
683 var0 = gimple_assign_rhs1 (def_stmt);
684 subcode = gimple_assign_rhs_code (def_stmt);
685 var1 = gimple_assign_rhs2 (def_stmt);
686
687 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
06cb4f79 688 }
b61b1f17
MM
689 CASE_CONVERT:
690 {
691 /* We must not introduce undefined overflow, and we must not change the value.
692 Hence we're okay if the inner type doesn't overflow to start with
693 (pointer or signed), the outer type also is an integer or pointer
694 and the outer precision is at least as large as the inner. */
695 tree itype = TREE_TYPE (op0);
696 if ((POINTER_TYPE_P (itype)
697 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
698 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
699 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
700 {
701 split_constant_offset (op0, &var0, off);
702 *var = fold_convert (type, var0);
703 return true;
704 }
705 return false;
706 }
06cb4f79 707
86a07404 708 default:
726a989a 709 return false;
86a07404 710 }
726a989a
RB
711}
712
713/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
714 will be ssizetype. */
715
716void
717split_constant_offset (tree exp, tree *var, tree *off)
718{
719 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
720 enum tree_code code;
86a07404 721
726a989a 722 *var = exp;
3cb960c7 723 *off = ssize_int (0);
726a989a
RB
724 STRIP_NOPS (exp);
725
ffd78b30
RG
726 if (tree_is_chrec (exp)
727 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
726a989a
RB
728 return;
729
730 otype = TREE_TYPE (exp);
731 code = TREE_CODE (exp);
732 extract_ops_from_tree (exp, &code, &op0, &op1);
733 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
734 {
735 *var = fold_convert (type, e);
736 *off = o;
737 }
86a07404
IR
738}
739
3cb960c7
ZD
740/* Returns the address ADDR of an object in a canonical shape (without nop
741 casts, and with type of pointer to the object). */
86a07404
IR
742
743static tree
3cb960c7 744canonicalize_base_object_address (tree addr)
86a07404 745{
bbc8a8dc
ZD
746 tree orig = addr;
747
3cb960c7 748 STRIP_NOPS (addr);
86a07404 749
bbc8a8dc
ZD
750 /* The base address may be obtained by casting from integer, in that case
751 keep the cast. */
752 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
753 return orig;
754
3cb960c7
ZD
755 if (TREE_CODE (addr) != ADDR_EXPR)
756 return addr;
86a07404 757
3cb960c7 758 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
86a07404
IR
759}
760
b8698a0f 761/* Analyzes the behavior of the memory reference DR in the innermost loop or
4e4452b6 762 basic block that contains it. Returns true if analysis succeed or false
a70d6342 763 otherwise. */
86a07404 764
3661e899 765bool
4e4452b6 766dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
86a07404 767{
726a989a 768 gimple stmt = DR_STMT (dr);
3cb960c7
ZD
769 struct loop *loop = loop_containing_stmt (stmt);
770 tree ref = DR_REF (dr);
86a07404 771 HOST_WIDE_INT pbitsize, pbitpos;
3cb960c7 772 tree base, poffset;
ef4bddc2 773 machine_mode pmode;
86a07404 774 int punsignedp, pvolatilep;
3cb960c7
ZD
775 affine_iv base_iv, offset_iv;
776 tree init, dinit, step;
a70d6342 777 bool in_loop = (loop && loop->num);
3cb960c7
ZD
778
779 if (dump_file && (dump_flags & TDF_DETAILS))
780 fprintf (dump_file, "analyze_innermost: ");
86a07404 781
3cb960c7 782 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
b3ecff82 783 &pmode, &punsignedp, &pvolatilep, false);
3cb960c7 784 gcc_assert (base != NULL_TREE);
86a07404 785
3cb960c7 786 if (pbitpos % BITS_PER_UNIT != 0)
86a07404 787 {
3cb960c7
ZD
788 if (dump_file && (dump_flags & TDF_DETAILS))
789 fprintf (dump_file, "failed: bit offset alignment.\n");
3661e899 790 return false;
3cb960c7 791 }
86a07404 792
70f34814
RG
793 if (TREE_CODE (base) == MEM_REF)
794 {
795 if (!integer_zerop (TREE_OPERAND (base, 1)))
796 {
807e902e
KZ
797 offset_int moff = mem_ref_offset (base);
798 tree mofft = wide_int_to_tree (sizetype, moff);
70f34814 799 if (!poffset)
673910d7 800 poffset = mofft;
70f34814 801 else
673910d7 802 poffset = size_binop (PLUS_EXPR, poffset, mofft);
70f34814
RG
803 }
804 base = TREE_OPERAND (base, 0);
805 }
806 else
807 base = build_fold_addr_expr (base);
4e4452b6 808
a70d6342 809 if (in_loop)
3cb960c7 810 {
b8698a0f 811 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
319e6439 812 nest ? true : false))
a70d6342 813 {
4e4452b6
IR
814 if (nest)
815 {
816 if (dump_file && (dump_flags & TDF_DETAILS))
817 fprintf (dump_file, "failed: evolution of base is not"
818 " affine.\n");
819 return false;
820 }
821 else
822 {
823 base_iv.base = base;
824 base_iv.step = ssize_int (0);
825 base_iv.no_overflow = true;
826 }
a70d6342
IR
827 }
828 }
829 else
830 {
831 base_iv.base = base;
832 base_iv.step = ssize_int (0);
833 base_iv.no_overflow = true;
3cb960c7 834 }
a70d6342 835
24adb18f 836 if (!poffset)
3cb960c7
ZD
837 {
838 offset_iv.base = ssize_int (0);
839 offset_iv.step = ssize_int (0);
840 }
24adb18f 841 else
3cb960c7 842 {
24adb18f
IR
843 if (!in_loop)
844 {
845 offset_iv.base = poffset;
846 offset_iv.step = ssize_int (0);
847 }
848 else if (!simple_iv (loop, loop_containing_stmt (stmt),
319e6439
RG
849 poffset, &offset_iv,
850 nest ? true : false))
24adb18f 851 {
4e4452b6
IR
852 if (nest)
853 {
854 if (dump_file && (dump_flags & TDF_DETAILS))
855 fprintf (dump_file, "failed: evolution of offset is not"
856 " affine.\n");
857 return false;
858 }
859 else
860 {
861 offset_iv.base = poffset;
862 offset_iv.step = ssize_int (0);
863 }
24adb18f 864 }
3cb960c7 865 }
86a07404 866
3cb960c7
ZD
867 init = ssize_int (pbitpos / BITS_PER_UNIT);
868 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
869 init = size_binop (PLUS_EXPR, init, dinit);
870 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
871 init = size_binop (PLUS_EXPR, init, dinit);
86a07404 872
3cb960c7
ZD
873 step = size_binop (PLUS_EXPR,
874 fold_convert (ssizetype, base_iv.step),
875 fold_convert (ssizetype, offset_iv.step));
86a07404 876
3cb960c7 877 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
86a07404 878
3cb960c7
ZD
879 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
880 DR_INIT (dr) = init;
881 DR_STEP (dr) = step;
86a07404 882
3cb960c7 883 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
86a07404 884
3cb960c7
ZD
885 if (dump_file && (dump_flags & TDF_DETAILS))
886 fprintf (dump_file, "success.\n");
3661e899
TB
887
888 return true;
3cb960c7 889}
86a07404 890
3cb960c7 891/* Determines the base object and the list of indices of memory reference
5c640e29 892 DR, analyzed in LOOP and instantiated in loop nest NEST. */
86a07404 893
3cb960c7 894static void
5c640e29 895dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
3cb960c7 896{
6e1aa848 897 vec<tree> access_fns = vNULL;
c4ddde1b 898 tree ref, op;
9fcb758b
RG
899 tree base, off, access_fn;
900 basic_block before_loop;
b8698a0f 901
9fcb758b
RG
902 /* If analyzing a basic-block there are no indices to analyze
903 and thus no access functions. */
02f5d6c5
RG
904 if (!nest)
905 {
9fcb758b 906 DR_BASE_OBJECT (dr) = DR_REF (dr);
9771b263 907 DR_ACCESS_FNS (dr).create (0);
02f5d6c5
RG
908 return;
909 }
910
c4ddde1b 911 ref = DR_REF (dr);
02f5d6c5 912 before_loop = block_before_loop (nest);
b8698a0f 913
9fcb758b
RG
914 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
915 into a two element array with a constant index. The base is
916 then just the immediate underlying object. */
917 if (TREE_CODE (ref) == REALPART_EXPR)
918 {
919 ref = TREE_OPERAND (ref, 0);
9771b263 920 access_fns.safe_push (integer_zero_node);
9fcb758b
RG
921 }
922 else if (TREE_CODE (ref) == IMAGPART_EXPR)
923 {
924 ref = TREE_OPERAND (ref, 0);
9771b263 925 access_fns.safe_push (integer_one_node);
9fcb758b
RG
926 }
927
b8324815 928 /* Analyze access functions of dimensions we know to be independent. */
c4ddde1b 929 while (handled_component_p (ref))
86a07404 930 {
c4ddde1b 931 if (TREE_CODE (ref) == ARRAY_REF)
86a07404 932 {
c4ddde1b 933 op = TREE_OPERAND (ref, 1);
02f5d6c5
RG
934 access_fn = analyze_scalar_evolution (loop, op);
935 access_fn = instantiate_scev (before_loop, loop, access_fn);
9771b263 936 access_fns.safe_push (access_fn);
b8324815 937 }
c4ddde1b
RG
938 else if (TREE_CODE (ref) == COMPONENT_REF
939 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
940 {
941 /* For COMPONENT_REFs of records (but not unions!) use the
942 FIELD_DECL offset as constant access function so we can
943 disambiguate a[i].f1 and a[i].f2. */
944 tree off = component_ref_field_offset (ref);
945 off = size_binop (PLUS_EXPR,
946 size_binop (MULT_EXPR,
947 fold_convert (bitsizetype, off),
948 bitsize_int (BITS_PER_UNIT)),
949 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
9771b263 950 access_fns.safe_push (off);
c4ddde1b
RG
951 }
952 else
953 /* If we have an unhandled component we could not translate
954 to an access function stop analyzing. We have determined
955 our base object in this case. */
956 break;
b8698a0f 957
c4ddde1b 958 ref = TREE_OPERAND (ref, 0);
86a07404
IR
959 }
960
8c330caa
RG
961 /* If the address operand of a MEM_REF base has an evolution in the
962 analyzed nest, add it as an additional independent access-function. */
c4ddde1b 963 if (TREE_CODE (ref) == MEM_REF)
86a07404 964 {
c4ddde1b 965 op = TREE_OPERAND (ref, 0);
3cb960c7 966 access_fn = analyze_scalar_evolution (loop, op);
a213b219 967 access_fn = instantiate_scev (before_loop, loop, access_fn);
8c330caa 968 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
2946bd34 969 {
0a500dd3 970 tree orig_type;
c4ddde1b 971 tree memoff = TREE_OPERAND (ref, 1);
8c330caa 972 base = initial_condition (access_fn);
0a500dd3
RG
973 orig_type = TREE_TYPE (base);
974 STRIP_USELESS_TYPE_CONVERSION (base);
8c330caa 975 split_constant_offset (base, &base, &off);
f65586dc 976 STRIP_USELESS_TYPE_CONVERSION (base);
8c330caa
RG
977 /* Fold the MEM_REF offset into the evolutions initial
978 value to make more bases comparable. */
c4ddde1b 979 if (!integer_zerop (memoff))
8c330caa
RG
980 {
981 off = size_binop (PLUS_EXPR, off,
c4ddde1b
RG
982 fold_convert (ssizetype, memoff));
983 memoff = build_int_cst (TREE_TYPE (memoff), 0);
8c330caa
RG
984 }
985 access_fn = chrec_replace_initial_condition
0a500dd3 986 (access_fn, fold_convert (orig_type, off));
c4ddde1b
RG
987 /* ??? This is still not a suitable base object for
988 dr_may_alias_p - the base object needs to be an
989 access that covers the object as whole. With
990 an evolution in the pointer this cannot be
991 guaranteed.
992 As a band-aid, mark the access so we can special-case
993 it in dr_may_alias_p. */
994 ref = fold_build2_loc (EXPR_LOCATION (ref),
995 MEM_REF, TREE_TYPE (ref),
996 base, memoff);
9771b263 997 access_fns.safe_push (access_fn);
2946bd34 998 }
86a07404 999 }
c4ddde1b
RG
1000 else if (DECL_P (ref))
1001 {
1002 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1003 ref = build2 (MEM_REF, TREE_TYPE (ref),
1004 build_fold_addr_expr (ref),
1005 build_int_cst (reference_alias_ptr_type (ref), 0));
1006 }
86a07404 1007
3cb960c7
ZD
1008 DR_BASE_OBJECT (dr) = ref;
1009 DR_ACCESS_FNS (dr) = access_fns;
86a07404
IR
1010}
1011
3cb960c7 1012/* Extracts the alias analysis information from the memory reference DR. */
86a07404 1013
3cb960c7
ZD
1014static void
1015dr_analyze_alias (struct data_reference *dr)
86a07404 1016{
3cb960c7 1017 tree ref = DR_REF (dr);
5006671f
RG
1018 tree base = get_base_address (ref), addr;
1019
70f34814
RG
1020 if (INDIRECT_REF_P (base)
1021 || TREE_CODE (base) == MEM_REF)
3cb960c7
ZD
1022 {
1023 addr = TREE_OPERAND (base, 0);
1024 if (TREE_CODE (addr) == SSA_NAME)
5006671f 1025 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
3cb960c7 1026 }
3cb960c7 1027}
86a07404 1028
3cb960c7 1029/* Frees data reference DR. */
8fdbc9c6 1030
dea61d92 1031void
8fdbc9c6
ZD
1032free_data_ref (data_reference_p dr)
1033{
9771b263 1034 DR_ACCESS_FNS (dr).release ();
8fdbc9c6
ZD
1035 free (dr);
1036}
86a07404 1037
3cb960c7
ZD
1038/* Analyzes memory reference MEMREF accessed in STMT. The reference
1039 is read if IS_READ is true, write otherwise. Returns the
5c640e29
SP
1040 data_reference description of MEMREF. NEST is the outermost loop
1041 in which the reference should be instantiated, LOOP is the loop in
1042 which the data reference should be analyzed. */
86a07404 1043
5417e022 1044struct data_reference *
5c640e29
SP
1045create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1046 bool is_read)
86a07404 1047{
3cb960c7 1048 struct data_reference *dr;
0ff4040e 1049
3cb960c7 1050 if (dump_file && (dump_flags & TDF_DETAILS))
0ff4040e 1051 {
3cb960c7
ZD
1052 fprintf (dump_file, "Creating dr for ");
1053 print_generic_expr (dump_file, memref, TDF_SLIM);
1054 fprintf (dump_file, "\n");
0ff4040e 1055 }
e2157b49 1056
3cb960c7
ZD
1057 dr = XCNEW (struct data_reference);
1058 DR_STMT (dr) = stmt;
1059 DR_REF (dr) = memref;
1060 DR_IS_READ (dr) = is_read;
86a07404 1061
4e4452b6 1062 dr_analyze_innermost (dr, nest);
5c640e29 1063 dr_analyze_indices (dr, nest, loop);
3cb960c7 1064 dr_analyze_alias (dr);
86a07404
IR
1065
1066 if (dump_file && (dump_flags & TDF_DETAILS))
1067 {
b8324815 1068 unsigned i;
3cb960c7 1069 fprintf (dump_file, "\tbase_address: ");
86a07404
IR
1070 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1071 fprintf (dump_file, "\n\toffset from base address: ");
1072 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1073 fprintf (dump_file, "\n\tconstant offset from base address: ");
1074 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
86a07404
IR
1075 fprintf (dump_file, "\n\tstep: ");
1076 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
3cb960c7
ZD
1077 fprintf (dump_file, "\n\taligned to: ");
1078 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1079 fprintf (dump_file, "\n\tbase_object: ");
1080 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
86a07404 1081 fprintf (dump_file, "\n");
b8324815
RG
1082 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1083 {
1084 fprintf (dump_file, "\tAccess function %d: ", i);
1085 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1086 }
3cb960c7
ZD
1087 }
1088
b8698a0f 1089 return dr;
86a07404
IR
1090}
1091
bfe068c3
IR
1092/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1093 expressions. */
1094static bool
1095dr_equal_offsets_p1 (tree offset1, tree offset2)
1096{
1097 bool res;
1098
1099 STRIP_NOPS (offset1);
1100 STRIP_NOPS (offset2);
1101
1102 if (offset1 == offset2)
1103 return true;
1104
1105 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1106 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1107 return false;
1108
1109 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1110 TREE_OPERAND (offset2, 0));
1111
1112 if (!res || !BINARY_CLASS_P (offset1))
1113 return res;
1114
1115 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1116 TREE_OPERAND (offset2, 1));
1117
1118 return res;
1119}
1120
1121/* Check if DRA and DRB have equal offsets. */
1122bool
1123dr_equal_offsets_p (struct data_reference *dra,
1124 struct data_reference *drb)
1125{
1126 tree offset1, offset2;
1127
1128 offset1 = DR_OFFSET (dra);
1129 offset2 = DR_OFFSET (drb);
1130
1131 return dr_equal_offsets_p1 (offset1, offset2);
1132}
1133
d93817c4
ZD
1134/* Returns true if FNA == FNB. */
1135
1136static bool
1137affine_function_equal_p (affine_fn fna, affine_fn fnb)
1138{
9771b263 1139 unsigned i, n = fna.length ();
86a07404 1140
9771b263 1141 if (n != fnb.length ())
f86289d5 1142 return false;
86df10e3 1143
d93817c4 1144 for (i = 0; i < n; i++)
9771b263 1145 if (!operand_equal_p (fna[i], fnb[i], 0))
d93817c4
ZD
1146 return false;
1147
1148 return true;
1149}
1150
1151/* If all the functions in CF are the same, returns one of them,
1152 otherwise returns NULL. */
1153
1154static affine_fn
1155common_affine_function (conflict_function *cf)
86df10e3 1156{
d93817c4
ZD
1157 unsigned i;
1158 affine_fn comm;
1159
1160 if (!CF_NONTRIVIAL_P (cf))
c3284718 1161 return affine_fn ();
d93817c4
ZD
1162
1163 comm = cf->fns[0];
1164
1165 for (i = 1; i < cf->n; i++)
1166 if (!affine_function_equal_p (comm, cf->fns[i]))
c3284718 1167 return affine_fn ();
d93817c4
ZD
1168
1169 return comm;
1170}
86df10e3 1171
d93817c4
ZD
1172/* Returns the base of the affine function FN. */
1173
1174static tree
1175affine_function_base (affine_fn fn)
1176{
9771b263 1177 return fn[0];
d93817c4
ZD
1178}
1179
1180/* Returns true if FN is a constant. */
1181
1182static bool
1183affine_function_constant_p (affine_fn fn)
1184{
1185 unsigned i;
1186 tree coef;
1187
9771b263 1188 for (i = 1; fn.iterate (i, &coef); i++)
d93817c4 1189 if (!integer_zerop (coef))
e2157b49
SP
1190 return false;
1191
86df10e3
SP
1192 return true;
1193}
1194
1baf2906
SP
1195/* Returns true if FN is the zero constant function. */
1196
1197static bool
1198affine_function_zero_p (affine_fn fn)
1199{
1200 return (integer_zerop (affine_function_base (fn))
1201 && affine_function_constant_p (fn));
1202}
1203
33b30201
SP
1204/* Returns a signed integer type with the largest precision from TA
1205 and TB. */
1206
1207static tree
1208signed_type_for_types (tree ta, tree tb)
1209{
1210 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1211 return signed_type_for (ta);
1212 else
1213 return signed_type_for (tb);
1214}
1215
d93817c4
ZD
1216/* Applies operation OP on affine functions FNA and FNB, and returns the
1217 result. */
1218
1219static affine_fn
1220affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1221{
1222 unsigned i, n, m;
1223 affine_fn ret;
1224 tree coef;
1225
9771b263 1226 if (fnb.length () > fna.length ())
d93817c4 1227 {
9771b263
DN
1228 n = fna.length ();
1229 m = fnb.length ();
d93817c4
ZD
1230 }
1231 else
1232 {
9771b263
DN
1233 n = fnb.length ();
1234 m = fna.length ();
d93817c4
ZD
1235 }
1236
9771b263 1237 ret.create (m);
d93817c4 1238 for (i = 0; i < n; i++)
33b30201 1239 {
9771b263
DN
1240 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1241 TREE_TYPE (fnb[i]));
1242 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
33b30201 1243 }
d93817c4 1244
9771b263
DN
1245 for (; fna.iterate (i, &coef); i++)
1246 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
d93817c4 1247 coef, integer_zero_node));
9771b263
DN
1248 for (; fnb.iterate (i, &coef); i++)
1249 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
d93817c4
ZD
1250 integer_zero_node, coef));
1251
1252 return ret;
1253}
1254
1255/* Returns the sum of affine functions FNA and FNB. */
1256
1257static affine_fn
1258affine_fn_plus (affine_fn fna, affine_fn fnb)
1259{
1260 return affine_fn_op (PLUS_EXPR, fna, fnb);
1261}
1262
1263/* Returns the difference of affine functions FNA and FNB. */
1264
1265static affine_fn
1266affine_fn_minus (affine_fn fna, affine_fn fnb)
1267{
1268 return affine_fn_op (MINUS_EXPR, fna, fnb);
1269}
1270
1271/* Frees affine function FN. */
1272
1273static void
1274affine_fn_free (affine_fn fn)
1275{
9771b263 1276 fn.release ();
d93817c4
ZD
1277}
1278
86df10e3
SP
1279/* Determine for each subscript in the data dependence relation DDR
1280 the distance. */
56cf8686 1281
0ff4040e 1282static void
86df10e3 1283compute_subscript_distance (struct data_dependence_relation *ddr)
56cf8686 1284{
d93817c4
ZD
1285 conflict_function *cf_a, *cf_b;
1286 affine_fn fn_a, fn_b, diff;
1287
56cf8686
SP
1288 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1289 {
1290 unsigned int i;
b8698a0f 1291
56cf8686
SP
1292 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1293 {
56cf8686 1294 struct subscript *subscript;
b8698a0f 1295
56cf8686 1296 subscript = DDR_SUBSCRIPT (ddr, i);
d93817c4
ZD
1297 cf_a = SUB_CONFLICTS_IN_A (subscript);
1298 cf_b = SUB_CONFLICTS_IN_B (subscript);
86df10e3 1299
d93817c4
ZD
1300 fn_a = common_affine_function (cf_a);
1301 fn_b = common_affine_function (cf_b);
9771b263 1302 if (!fn_a.exists () || !fn_b.exists ())
86df10e3 1303 {
d93817c4
ZD
1304 SUB_DISTANCE (subscript) = chrec_dont_know;
1305 return;
86df10e3 1306 }
d93817c4 1307 diff = affine_fn_minus (fn_a, fn_b);
b8698a0f 1308
d93817c4
ZD
1309 if (affine_function_constant_p (diff))
1310 SUB_DISTANCE (subscript) = affine_function_base (diff);
56cf8686
SP
1311 else
1312 SUB_DISTANCE (subscript) = chrec_dont_know;
d93817c4
ZD
1313
1314 affine_fn_free (diff);
56cf8686
SP
1315 }
1316 }
1317}
1318
d93817c4
ZD
1319/* Returns the conflict function for "unknown". */
1320
1321static conflict_function *
1322conflict_fn_not_known (void)
1323{
1324 conflict_function *fn = XCNEW (conflict_function);
1325 fn->n = NOT_KNOWN;
1326
1327 return fn;
1328}
1329
1330/* Returns the conflict function for "independent". */
1331
1332static conflict_function *
1333conflict_fn_no_dependence (void)
1334{
1335 conflict_function *fn = XCNEW (conflict_function);
1336 fn->n = NO_DEPENDENCE;
1337
1338 return fn;
1339}
1340
3cb960c7
ZD
1341/* Returns true if the address of OBJ is invariant in LOOP. */
1342
1343static bool
ed7a4b4b 1344object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
3cb960c7
ZD
1345{
1346 while (handled_component_p (obj))
1347 {
1348 if (TREE_CODE (obj) == ARRAY_REF)
1349 {
1350 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1351 need to check the stride and the lower bound of the reference. */
1352 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1353 loop->num)
1354 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1355 loop->num))
1356 return false;
1357 }
1358 else if (TREE_CODE (obj) == COMPONENT_REF)
1359 {
1360 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1361 loop->num))
1362 return false;
1363 }
1364 obj = TREE_OPERAND (obj, 0);
1365 }
1366
70f34814
RG
1367 if (!INDIRECT_REF_P (obj)
1368 && TREE_CODE (obj) != MEM_REF)
3cb960c7
ZD
1369 return true;
1370
1371 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1372 loop->num);
1373}
1374
3cb960c7 1375/* Returns false if we can prove that data references A and B do not alias,
02f5d6c5
RG
1376 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1377 considered. */
3cb960c7 1378
f8bf9252 1379bool
02f5d6c5
RG
1380dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1381 bool loop_nest)
3cb960c7 1382{
7d36e538
RG
1383 tree addr_a = DR_BASE_OBJECT (a);
1384 tree addr_b = DR_BASE_OBJECT (b);
3cb960c7 1385
02f5d6c5
RG
1386 /* If we are not processing a loop nest but scalar code we
1387 do not need to care about possible cross-iteration dependences
1388 and thus can process the full original reference. Do so,
1389 similar to how loop invariant motion applies extra offset-based
1390 disambiguation. */
1391 if (!loop_nest)
1392 {
1393 aff_tree off1, off2;
807e902e 1394 widest_int size1, size2;
02f5d6c5
RG
1395 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1396 get_inner_reference_aff (DR_REF (b), &off2, &size2);
807e902e 1397 aff_combination_scale (&off1, -1);
02f5d6c5
RG
1398 aff_combination_add (&off2, &off1);
1399 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1400 return false;
1401 }
1402
6e2028ff
RB
1403 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1404 do not know the size of the base-object. So we cannot do any
1405 offset/overlap based analysis but have to rely on points-to
1406 information only. */
c4ddde1b 1407 if (TREE_CODE (addr_a) == MEM_REF
6e2028ff 1408 && TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)
c4ddde1b 1409 {
6e2028ff
RB
1410 /* For true dependences we can apply TBAA. */
1411 if (flag_strict_aliasing
1412 && DR_IS_WRITE (a) && DR_IS_READ (b)
1413 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1414 get_alias_set (DR_REF (b))))
1415 return false;
1416 if (TREE_CODE (addr_b) == MEM_REF)
c4ddde1b
RG
1417 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1418 TREE_OPERAND (addr_b, 0));
1419 else
1420 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1421 build_fold_addr_expr (addr_b));
1422 }
1423 else if (TREE_CODE (addr_b) == MEM_REF
6e2028ff
RB
1424 && TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)
1425 {
1426 /* For true dependences we can apply TBAA. */
1427 if (flag_strict_aliasing
1428 && DR_IS_WRITE (a) && DR_IS_READ (b)
1429 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1430 get_alias_set (DR_REF (b))))
1431 return false;
1432 if (TREE_CODE (addr_a) == MEM_REF)
1433 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1434 TREE_OPERAND (addr_b, 0));
1435 else
1436 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1437 TREE_OPERAND (addr_b, 0));
1438 }
c4ddde1b
RG
1439
1440 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1441 that is being subsetted in the loop nest. */
b0af49c4 1442 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
7d36e538 1443 return refs_output_dependent_p (addr_a, addr_b);
b0af49c4 1444 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
7d36e538
RG
1445 return refs_anti_dependent_p (addr_a, addr_b);
1446 return refs_may_alias_p (addr_a, addr_b);
3cb960c7
ZD
1447}
1448
0ff4040e
SP
1449/* Initialize a data dependence relation between data accesses A and
1450 B. NB_LOOPS is the number of loops surrounding the references: the
1451 size of the classic distance/direction vectors. */
56cf8686 1452
aec7ae7d 1453struct data_dependence_relation *
b8698a0f 1454initialize_data_dependence_relation (struct data_reference *a,
0ff4040e 1455 struct data_reference *b,
9771b263 1456 vec<loop_p> loop_nest)
56cf8686
SP
1457{
1458 struct data_dependence_relation *res;
0ff4040e 1459 unsigned int i;
b8698a0f 1460
5ed6ace5 1461 res = XNEW (struct data_dependence_relation);
56cf8686
SP
1462 DDR_A (res) = a;
1463 DDR_B (res) = b;
9771b263 1464 DDR_LOOP_NEST (res).create (0);
71d5b5e1 1465 DDR_REVERSED_P (res) = false;
9771b263
DN
1466 DDR_SUBSCRIPTS (res).create (0);
1467 DDR_DIR_VECTS (res).create (0);
1468 DDR_DIST_VECTS (res).create (0);
56cf8686 1469
86a07404
IR
1470 if (a == NULL || b == NULL)
1471 {
b8698a0f 1472 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
86a07404 1473 return res;
b8698a0f 1474 }
86a07404 1475
3cb960c7 1476 /* If the data references do not alias, then they are independent. */
9771b263 1477 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
86a07404 1478 {
b8698a0f 1479 DDR_ARE_DEPENDENT (res) = chrec_known;
86a07404
IR
1480 return res;
1481 }
56cf8686 1482
fea99a37 1483 /* The case where the references are exactly the same. */
b3924be9
SP
1484 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1485 {
9771b263
DN
1486 if (loop_nest.exists ()
1487 && !object_address_invariant_in_loop_p (loop_nest[0],
1a4571cb
RL
1488 DR_BASE_OBJECT (a)))
1489 {
1490 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1491 return res;
1492 }
b3924be9
SP
1493 DDR_AFFINE_P (res) = true;
1494 DDR_ARE_DEPENDENT (res) = NULL_TREE;
9771b263 1495 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
b3924be9
SP
1496 DDR_LOOP_NEST (res) = loop_nest;
1497 DDR_INNER_LOOP (res) = 0;
1498 DDR_SELF_REFERENCE (res) = true;
1a4571cb
RL
1499 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1500 {
1501 struct subscript *subscript;
1502
1503 subscript = XNEW (struct subscript);
1504 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1505 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1506 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1507 SUB_DISTANCE (subscript) = chrec_dont_know;
9771b263 1508 DDR_SUBSCRIPTS (res).safe_push (subscript);
1a4571cb 1509 }
b3924be9
SP
1510 return res;
1511 }
1512
3cb960c7
ZD
1513 /* If the references do not access the same object, we do not know
1514 whether they alias or not. */
1515 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
56cf8686 1516 {
b8698a0f 1517 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
86a07404
IR
1518 return res;
1519 }
0ff4040e 1520
3cb960c7 1521 /* If the base of the object is not invariant in the loop nest, we cannot
0d52bcc1 1522 analyze it. TODO -- in fact, it would suffice to record that there may
c80b4100 1523 be arbitrary dependences in the loops where the base object varies. */
9771b263
DN
1524 if (loop_nest.exists ()
1525 && !object_address_invariant_in_loop_p (loop_nest[0],
a70d6342 1526 DR_BASE_OBJECT (a)))
86a07404 1527 {
b8698a0f 1528 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
86a07404
IR
1529 return res;
1530 }
3cb960c7 1531
19368333
RG
1532 /* If the number of dimensions of the access to not agree we can have
1533 a pointer access to a component of the array element type and an
1534 array access while the base-objects are still the same. Punt. */
1535 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1536 {
1537 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1538 return res;
1539 }
3cb960c7 1540
86a07404
IR
1541 DDR_AFFINE_P (res) = true;
1542 DDR_ARE_DEPENDENT (res) = NULL_TREE;
9771b263 1543 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
ba42e045 1544 DDR_LOOP_NEST (res) = loop_nest;
3d8864c0 1545 DDR_INNER_LOOP (res) = 0;
b3924be9 1546 DDR_SELF_REFERENCE (res) = false;
304afda6 1547
86a07404
IR
1548 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1549 {
1550 struct subscript *subscript;
b8698a0f 1551
5ed6ace5 1552 subscript = XNEW (struct subscript);
d93817c4
ZD
1553 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1554 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
86a07404
IR
1555 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1556 SUB_DISTANCE (subscript) = chrec_dont_know;
9771b263 1557 DDR_SUBSCRIPTS (res).safe_push (subscript);
56cf8686 1558 }
ebf78a47 1559
56cf8686
SP
1560 return res;
1561}
1562
d93817c4
ZD
1563/* Frees memory used by the conflict function F. */
1564
1565static void
1566free_conflict_function (conflict_function *f)
1567{
1568 unsigned i;
1569
1570 if (CF_NONTRIVIAL_P (f))
1571 {
1572 for (i = 0; i < f->n; i++)
1573 affine_fn_free (f->fns[i]);
1574 }
1575 free (f);
1576}
1577
1578/* Frees memory used by SUBSCRIPTS. */
1579
1580static void
9771b263 1581free_subscripts (vec<subscript_p> subscripts)
d93817c4
ZD
1582{
1583 unsigned i;
1584 subscript_p s;
1585
9771b263 1586 FOR_EACH_VEC_ELT (subscripts, i, s)
d93817c4
ZD
1587 {
1588 free_conflict_function (s->conflicting_iterations_in_a);
1589 free_conflict_function (s->conflicting_iterations_in_b);
a0044be5 1590 free (s);
d93817c4 1591 }
9771b263 1592 subscripts.release ();
d93817c4
ZD
1593}
1594
56cf8686
SP
1595/* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1596 description. */
1597
1598static inline void
b8698a0f 1599finalize_ddr_dependent (struct data_dependence_relation *ddr,
56cf8686
SP
1600 tree chrec)
1601{
b8698a0f 1602 DDR_ARE_DEPENDENT (ddr) = chrec;
d93817c4 1603 free_subscripts (DDR_SUBSCRIPTS (ddr));
9771b263 1604 DDR_SUBSCRIPTS (ddr).create (0);
56cf8686
SP
1605}
1606
86df10e3
SP
1607/* The dependence relation DDR cannot be represented by a distance
1608 vector. */
1609
1610static inline void
1611non_affine_dependence_relation (struct data_dependence_relation *ddr)
1612{
1613 if (dump_file && (dump_flags & TDF_DETAILS))
1614 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1615
1616 DDR_AFFINE_P (ddr) = false;
1617}
1618
56cf8686
SP
1619\f
1620
1621/* This section contains the classic Banerjee tests. */
1622
1623/* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1624 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1625
1626static inline bool
ed7a4b4b 1627ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
56cf8686
SP
1628{
1629 return (evolution_function_is_constant_p (chrec_a)
1630 && evolution_function_is_constant_p (chrec_b));
1631}
1632
1633/* Returns true iff CHREC_A and CHREC_B are dependent on an index
1634 variable, i.e., if the SIV (Single Index Variable) test is true. */
1635
1636static bool
ed7a4b4b 1637siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
56cf8686
SP
1638{
1639 if ((evolution_function_is_constant_p (chrec_a)
1640 && evolution_function_is_univariate_p (chrec_b))
1641 || (evolution_function_is_constant_p (chrec_b)
1642 && evolution_function_is_univariate_p (chrec_a)))
1643 return true;
b8698a0f 1644
56cf8686
SP
1645 if (evolution_function_is_univariate_p (chrec_a)
1646 && evolution_function_is_univariate_p (chrec_b))
1647 {
1648 switch (TREE_CODE (chrec_a))
1649 {
1650 case POLYNOMIAL_CHREC:
1651 switch (TREE_CODE (chrec_b))
1652 {
1653 case POLYNOMIAL_CHREC:
1654 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1655 return false;
b8698a0f 1656
56cf8686
SP
1657 default:
1658 return true;
1659 }
b8698a0f 1660
56cf8686
SP
1661 default:
1662 return true;
1663 }
1664 }
b8698a0f 1665
56cf8686
SP
1666 return false;
1667}
1668
d93817c4
ZD
1669/* Creates a conflict function with N dimensions. The affine functions
1670 in each dimension follow. */
1671
1672static conflict_function *
1673conflict_fn (unsigned n, ...)
1674{
1675 unsigned i;
1676 conflict_function *ret = XCNEW (conflict_function);
1677 va_list ap;
1678
b39c6706 1679 gcc_assert (0 < n && n <= MAX_DIM);
c3284718 1680 va_start (ap, n);
b8698a0f 1681
d93817c4
ZD
1682 ret->n = n;
1683 for (i = 0; i < n; i++)
1684 ret->fns[i] = va_arg (ap, affine_fn);
c3284718 1685 va_end (ap);
d93817c4
ZD
1686
1687 return ret;
1688}
1689
1690/* Returns constant affine function with value CST. */
1691
1692static affine_fn
1693affine_fn_cst (tree cst)
1694{
9771b263
DN
1695 affine_fn fn;
1696 fn.create (1);
1697 fn.quick_push (cst);
d93817c4
ZD
1698 return fn;
1699}
1700
1701/* Returns affine function with single variable, CST + COEF * x_DIM. */
1702
1703static affine_fn
1704affine_fn_univar (tree cst, unsigned dim, tree coef)
1705{
9771b263
DN
1706 affine_fn fn;
1707 fn.create (dim + 1);
d93817c4
ZD
1708 unsigned i;
1709
1710 gcc_assert (dim > 0);
9771b263 1711 fn.quick_push (cst);
d93817c4 1712 for (i = 1; i < dim; i++)
9771b263
DN
1713 fn.quick_push (integer_zero_node);
1714 fn.quick_push (coef);
d93817c4
ZD
1715 return fn;
1716}
1717
56cf8686
SP
1718/* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1719 *OVERLAPS_B are initialized to the functions that describe the
1720 relation between the elements accessed twice by CHREC_A and
1721 CHREC_B. For k >= 0, the following property is verified:
1722
1723 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1724
b8698a0f
L
1725static void
1726analyze_ziv_subscript (tree chrec_a,
1727 tree chrec_b,
d93817c4 1728 conflict_function **overlaps_a,
b8698a0f 1729 conflict_function **overlaps_b,
86df10e3 1730 tree *last_conflicts)
56cf8686 1731{
33b30201 1732 tree type, difference;
0ff4040e 1733 dependence_stats.num_ziv++;
b8698a0f 1734
56cf8686
SP
1735 if (dump_file && (dump_flags & TDF_DETAILS))
1736 fprintf (dump_file, "(analyze_ziv_subscript \n");
33b30201
SP
1737
1738 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
726a989a
RB
1739 chrec_a = chrec_convert (type, chrec_a, NULL);
1740 chrec_b = chrec_convert (type, chrec_b, NULL);
33b30201 1741 difference = chrec_fold_minus (type, chrec_a, chrec_b);
b8698a0f 1742
56cf8686
SP
1743 switch (TREE_CODE (difference))
1744 {
1745 case INTEGER_CST:
1746 if (integer_zerop (difference))
1747 {
1748 /* The difference is equal to zero: the accessed index
1749 overlaps for each iteration in the loop. */
d93817c4
ZD
1750 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1751 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
86df10e3 1752 *last_conflicts = chrec_dont_know;
0ff4040e 1753 dependence_stats.num_ziv_dependent++;
56cf8686
SP
1754 }
1755 else
1756 {
1757 /* The accesses do not overlap. */
d93817c4
ZD
1758 *overlaps_a = conflict_fn_no_dependence ();
1759 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 1760 *last_conflicts = integer_zero_node;
0ff4040e 1761 dependence_stats.num_ziv_independent++;
56cf8686
SP
1762 }
1763 break;
b8698a0f 1764
56cf8686 1765 default:
b8698a0f 1766 /* We're not sure whether the indexes overlap. For the moment,
56cf8686 1767 conservatively answer "don't know". */
0ff4040e
SP
1768 if (dump_file && (dump_flags & TDF_DETAILS))
1769 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1770
d93817c4
ZD
1771 *overlaps_a = conflict_fn_not_known ();
1772 *overlaps_b = conflict_fn_not_known ();
86df10e3 1773 *last_conflicts = chrec_dont_know;
0ff4040e 1774 dependence_stats.num_ziv_unimplemented++;
56cf8686
SP
1775 break;
1776 }
b8698a0f 1777
56cf8686
SP
1778 if (dump_file && (dump_flags & TDF_DETAILS))
1779 fprintf (dump_file, ")\n");
1780}
1781
b4a9343c 1782/* Similar to max_stmt_executions_int, but returns the bound as a tree,
4839cb59 1783 and only if it fits to the int type. If this is not the case, or the
b4a9343c 1784 bound on the number of iterations of LOOP could not be derived, returns
4839cb59
ZD
1785 chrec_dont_know. */
1786
1787static tree
b4a9343c 1788max_stmt_executions_tree (struct loop *loop)
4839cb59 1789{
807e902e 1790 widest_int nit;
4839cb59 1791
652c4c71 1792 if (!max_stmt_executions (loop, &nit))
4839cb59
ZD
1793 return chrec_dont_know;
1794
807e902e 1795 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
4839cb59
ZD
1796 return chrec_dont_know;
1797
807e902e 1798 return wide_int_to_tree (unsigned_type_node, nit);
4839cb59
ZD
1799}
1800
5f1fab58
RG
1801/* Determine whether the CHREC is always positive/negative. If the expression
1802 cannot be statically analyzed, return false, otherwise set the answer into
1803 VALUE. */
1804
1805static bool
1806chrec_is_positive (tree chrec, bool *value)
1807{
1808 bool value0, value1, value2;
1809 tree end_value, nb_iter;
1810
1811 switch (TREE_CODE (chrec))
1812 {
1813 case POLYNOMIAL_CHREC:
1814 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1815 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1816 return false;
1817
1818 /* FIXME -- overflows. */
1819 if (value0 == value1)
1820 {
1821 *value = value0;
1822 return true;
1823 }
1824
1825 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1826 and the proof consists in showing that the sign never
1827 changes during the execution of the loop, from 0 to
1828 loop->nb_iterations. */
1829 if (!evolution_function_is_affine_p (chrec))
1830 return false;
1831
1832 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1833 if (chrec_contains_undetermined (nb_iter))
1834 return false;
1835
1836#if 0
1837 /* TODO -- If the test is after the exit, we may decrease the number of
1838 iterations by one. */
1839 if (after_exit)
1840 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1841#endif
1842
1843 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1844
1845 if (!chrec_is_positive (end_value, &value2))
1846 return false;
1847
1848 *value = value0;
1849 return value0 == value1;
1850
1851 case INTEGER_CST:
1852 switch (tree_int_cst_sgn (chrec))
1853 {
1854 case -1:
1855 *value = false;
1856 break;
1857 case 1:
1858 *value = true;
1859 break;
1860 default:
1861 return false;
1862 }
1863 return true;
1864
1865 default:
1866 return false;
1867 }
1868}
1869
1870
56cf8686
SP
1871/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1872 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1873 *OVERLAPS_B are initialized to the functions that describe the
1874 relation between the elements accessed twice by CHREC_A and
1875 CHREC_B. For k >= 0, the following property is verified:
1876
1877 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1878
1879static void
b8698a0f 1880analyze_siv_subscript_cst_affine (tree chrec_a,
56cf8686 1881 tree chrec_b,
b8698a0f
L
1882 conflict_function **overlaps_a,
1883 conflict_function **overlaps_b,
86df10e3 1884 tree *last_conflicts)
56cf8686
SP
1885{
1886 bool value0, value1, value2;
33b30201 1887 tree type, difference, tmp;
e2157b49 1888
33b30201 1889 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
726a989a
RB
1890 chrec_a = chrec_convert (type, chrec_a, NULL);
1891 chrec_b = chrec_convert (type, chrec_b, NULL);
33b30201 1892 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
b8698a0f 1893
5f1fab58
RG
1894 /* Special case overlap in the first iteration. */
1895 if (integer_zerop (difference))
1896 {
1897 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1898 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1899 *last_conflicts = integer_one_node;
1900 return;
1901 }
1902
56cf8686
SP
1903 if (!chrec_is_positive (initial_condition (difference), &value0))
1904 {
0ff4040e 1905 if (dump_file && (dump_flags & TDF_DETAILS))
b8698a0f 1906 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
0ff4040e
SP
1907
1908 dependence_stats.num_siv_unimplemented++;
d93817c4
ZD
1909 *overlaps_a = conflict_fn_not_known ();
1910 *overlaps_b = conflict_fn_not_known ();
86df10e3 1911 *last_conflicts = chrec_dont_know;
56cf8686
SP
1912 return;
1913 }
1914 else
1915 {
1916 if (value0 == false)
1917 {
1918 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1919 {
0ff4040e
SP
1920 if (dump_file && (dump_flags & TDF_DETAILS))
1921 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1922
d93817c4 1923 *overlaps_a = conflict_fn_not_known ();
b8698a0f 1924 *overlaps_b = conflict_fn_not_known ();
86df10e3 1925 *last_conflicts = chrec_dont_know;
0ff4040e 1926 dependence_stats.num_siv_unimplemented++;
56cf8686
SP
1927 return;
1928 }
1929 else
1930 {
1931 if (value1 == true)
1932 {
b8698a0f 1933 /* Example:
56cf8686
SP
1934 chrec_a = 12
1935 chrec_b = {10, +, 1}
1936 */
b8698a0f 1937
f457cf40 1938 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
56cf8686 1939 {
4839cb59
ZD
1940 HOST_WIDE_INT numiter;
1941 struct loop *loop = get_chrec_loop (chrec_b);
416f403e 1942
d93817c4 1943 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
33b30201
SP
1944 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1945 fold_build1 (ABS_EXPR, type, difference),
d93817c4
ZD
1946 CHREC_RIGHT (chrec_b));
1947 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
86df10e3 1948 *last_conflicts = integer_one_node;
b8698a0f 1949
416f403e
DB
1950
1951 /* Perform weak-zero siv test to see if overlap is
1952 outside the loop bounds. */
652c4c71 1953 numiter = max_stmt_executions_int (loop);
416f403e 1954
4839cb59
ZD
1955 if (numiter >= 0
1956 && compare_tree_int (tmp, numiter) > 0)
416f403e 1957 {
d93817c4
ZD
1958 free_conflict_function (*overlaps_a);
1959 free_conflict_function (*overlaps_b);
1960 *overlaps_a = conflict_fn_no_dependence ();
1961 *overlaps_b = conflict_fn_no_dependence ();
416f403e 1962 *last_conflicts = integer_zero_node;
0ff4040e 1963 dependence_stats.num_siv_independent++;
416f403e 1964 return;
b8698a0f 1965 }
0ff4040e 1966 dependence_stats.num_siv_dependent++;
56cf8686
SP
1967 return;
1968 }
b8698a0f 1969
f457cf40 1970 /* When the step does not divide the difference, there are
56cf8686
SP
1971 no overlaps. */
1972 else
1973 {
d93817c4 1974 *overlaps_a = conflict_fn_no_dependence ();
b8698a0f 1975 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 1976 *last_conflicts = integer_zero_node;
0ff4040e 1977 dependence_stats.num_siv_independent++;
56cf8686
SP
1978 return;
1979 }
1980 }
b8698a0f 1981
56cf8686
SP
1982 else
1983 {
b8698a0f 1984 /* Example:
56cf8686
SP
1985 chrec_a = 12
1986 chrec_b = {10, +, -1}
b8698a0f 1987
56cf8686 1988 In this case, chrec_a will not overlap with chrec_b. */
d93817c4
ZD
1989 *overlaps_a = conflict_fn_no_dependence ();
1990 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 1991 *last_conflicts = integer_zero_node;
0ff4040e 1992 dependence_stats.num_siv_independent++;
56cf8686
SP
1993 return;
1994 }
1995 }
1996 }
b8698a0f 1997 else
56cf8686
SP
1998 {
1999 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2000 {
0ff4040e
SP
2001 if (dump_file && (dump_flags & TDF_DETAILS))
2002 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2003
d93817c4 2004 *overlaps_a = conflict_fn_not_known ();
b8698a0f 2005 *overlaps_b = conflict_fn_not_known ();
86df10e3 2006 *last_conflicts = chrec_dont_know;
0ff4040e 2007 dependence_stats.num_siv_unimplemented++;
56cf8686
SP
2008 return;
2009 }
2010 else
2011 {
2012 if (value2 == false)
2013 {
b8698a0f 2014 /* Example:
56cf8686
SP
2015 chrec_a = 3
2016 chrec_b = {10, +, -1}
2017 */
f457cf40 2018 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
56cf8686 2019 {
4839cb59
ZD
2020 HOST_WIDE_INT numiter;
2021 struct loop *loop = get_chrec_loop (chrec_b);
416f403e 2022
d93817c4 2023 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
33b30201 2024 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
d93817c4
ZD
2025 CHREC_RIGHT (chrec_b));
2026 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
86df10e3 2027 *last_conflicts = integer_one_node;
416f403e
DB
2028
2029 /* Perform weak-zero siv test to see if overlap is
2030 outside the loop bounds. */
652c4c71 2031 numiter = max_stmt_executions_int (loop);
416f403e 2032
4839cb59
ZD
2033 if (numiter >= 0
2034 && compare_tree_int (tmp, numiter) > 0)
416f403e 2035 {
d93817c4
ZD
2036 free_conflict_function (*overlaps_a);
2037 free_conflict_function (*overlaps_b);
2038 *overlaps_a = conflict_fn_no_dependence ();
2039 *overlaps_b = conflict_fn_no_dependence ();
416f403e 2040 *last_conflicts = integer_zero_node;
0ff4040e 2041 dependence_stats.num_siv_independent++;
416f403e 2042 return;
b8698a0f 2043 }
0ff4040e 2044 dependence_stats.num_siv_dependent++;
56cf8686
SP
2045 return;
2046 }
b8698a0f 2047
4286d8ce 2048 /* When the step does not divide the difference, there
56cf8686
SP
2049 are no overlaps. */
2050 else
2051 {
d93817c4 2052 *overlaps_a = conflict_fn_no_dependence ();
b8698a0f 2053 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 2054 *last_conflicts = integer_zero_node;
0ff4040e 2055 dependence_stats.num_siv_independent++;
56cf8686
SP
2056 return;
2057 }
2058 }
2059 else
2060 {
b8698a0f
L
2061 /* Example:
2062 chrec_a = 3
56cf8686 2063 chrec_b = {4, +, 1}
b8698a0f 2064
56cf8686 2065 In this case, chrec_a will not overlap with chrec_b. */
d93817c4
ZD
2066 *overlaps_a = conflict_fn_no_dependence ();
2067 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 2068 *last_conflicts = integer_zero_node;
0ff4040e 2069 dependence_stats.num_siv_independent++;
56cf8686
SP
2070 return;
2071 }
2072 }
2073 }
2074 }
2075}
2076
50300b4c 2077/* Helper recursive function for initializing the matrix A. Returns
86df10e3 2078 the initial value of CHREC. */
56cf8686 2079
5b78fc3e 2080static tree
86df10e3
SP
2081initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2082{
2083 gcc_assert (chrec);
2084
5b78fc3e
JS
2085 switch (TREE_CODE (chrec))
2086 {
2087 case POLYNOMIAL_CHREC:
2088 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2089
2090 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2091 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2092
2093 case PLUS_EXPR:
2094 case MULT_EXPR:
2095 case MINUS_EXPR:
2096 {
2097 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2098 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2099
2100 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2101 }
2102
625a9766 2103 CASE_CONVERT:
5b78fc3e
JS
2104 {
2105 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
726a989a 2106 return chrec_convert (chrec_type (chrec), op, NULL);
5b78fc3e
JS
2107 }
2108
418df9d7
JJ
2109 case BIT_NOT_EXPR:
2110 {
2111 /* Handle ~X as -1 - X. */
2112 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2113 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2114 build_int_cst (TREE_TYPE (chrec), -1), op);
2115 }
2116
5b78fc3e
JS
2117 case INTEGER_CST:
2118 return chrec;
a1a5996d 2119
5b78fc3e
JS
2120 default:
2121 gcc_unreachable ();
2122 return NULL_TREE;
2123 }
86df10e3
SP
2124}
2125
2126#define FLOOR_DIV(x,y) ((x) / (y))
2127
b8698a0f 2128/* Solves the special case of the Diophantine equation:
86df10e3
SP
2129 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2130
2131 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2132 number of iterations that loops X and Y run. The overlaps will be
2133 constructed as evolutions in dimension DIM. */
56cf8686
SP
2134
2135static void
b8698a0f 2136compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
d93817c4 2137 affine_fn *overlaps_a,
b8698a0f 2138 affine_fn *overlaps_b,
86df10e3
SP
2139 tree *last_conflicts, int dim)
2140{
2141 if (((step_a > 0 && step_b > 0)
2142 || (step_a < 0 && step_b < 0)))
2143 {
2144 int step_overlaps_a, step_overlaps_b;
2145 int gcd_steps_a_b, last_conflict, tau2;
2146
2147 gcd_steps_a_b = gcd (step_a, step_b);
2148 step_overlaps_a = step_b / gcd_steps_a_b;
2149 step_overlaps_b = step_a / gcd_steps_a_b;
2150
2c26cbfd
SP
2151 if (niter > 0)
2152 {
2153 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2154 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2155 last_conflict = tau2;
2156 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2157 }
2158 else
2159 *last_conflicts = chrec_dont_know;
86df10e3 2160
b8698a0f 2161 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
d93817c4
ZD
2162 build_int_cst (NULL_TREE,
2163 step_overlaps_a));
b8698a0f
L
2164 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2165 build_int_cst (NULL_TREE,
d93817c4 2166 step_overlaps_b));
86df10e3
SP
2167 }
2168
2169 else
2170 {
d93817c4
ZD
2171 *overlaps_a = affine_fn_cst (integer_zero_node);
2172 *overlaps_b = affine_fn_cst (integer_zero_node);
86df10e3
SP
2173 *last_conflicts = integer_zero_node;
2174 }
2175}
2176
86df10e3
SP
2177/* Solves the special case of a Diophantine equation where CHREC_A is
2178 an affine bivariate function, and CHREC_B is an affine univariate
b8698a0f 2179 function. For example,
86df10e3
SP
2180
2181 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
b8698a0f
L
2182
2183 has the following overlapping functions:
86df10e3
SP
2184
2185 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2186 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2187 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2188
35fd3193 2189 FORNOW: This is a specialized implementation for a case occurring in
86df10e3
SP
2190 a common benchmark. Implement the general algorithm. */
2191
2192static void
b8698a0f 2193compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
d93817c4 2194 conflict_function **overlaps_a,
b8698a0f 2195 conflict_function **overlaps_b,
86df10e3 2196 tree *last_conflicts)
56cf8686 2197{
86df10e3
SP
2198 bool xz_p, yz_p, xyz_p;
2199 int step_x, step_y, step_z;
4839cb59 2200 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
d93817c4
ZD
2201 affine_fn overlaps_a_xz, overlaps_b_xz;
2202 affine_fn overlaps_a_yz, overlaps_b_yz;
2203 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2204 affine_fn ova1, ova2, ovb;
2205 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
86df10e3 2206
6b6fa4e9
SP
2207 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2208 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2209 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
86df10e3 2210
652c4c71
RG
2211 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2212 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2213 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
b8698a0f 2214
4839cb59 2215 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
86df10e3 2216 {
0ff4040e
SP
2217 if (dump_file && (dump_flags & TDF_DETAILS))
2218 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
b8698a0f 2219
d93817c4
ZD
2220 *overlaps_a = conflict_fn_not_known ();
2221 *overlaps_b = conflict_fn_not_known ();
86df10e3
SP
2222 *last_conflicts = chrec_dont_know;
2223 return;
2224 }
2225
86df10e3
SP
2226 niter = MIN (niter_x, niter_z);
2227 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2228 &overlaps_a_xz,
2229 &overlaps_b_xz,
2230 &last_conflicts_xz, 1);
2231 niter = MIN (niter_y, niter_z);
2232 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2233 &overlaps_a_yz,
2234 &overlaps_b_yz,
2235 &last_conflicts_yz, 2);
2236 niter = MIN (niter_x, niter_z);
2237 niter = MIN (niter_y, niter);
2238 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2239 &overlaps_a_xyz,
2240 &overlaps_b_xyz,
2241 &last_conflicts_xyz, 3);
2242
2243 xz_p = !integer_zerop (last_conflicts_xz);
2244 yz_p = !integer_zerop (last_conflicts_yz);
2245 xyz_p = !integer_zerop (last_conflicts_xyz);
2246
2247 if (xz_p || yz_p || xyz_p)
2248 {
d93817c4
ZD
2249 ova1 = affine_fn_cst (integer_zero_node);
2250 ova2 = affine_fn_cst (integer_zero_node);
2251 ovb = affine_fn_cst (integer_zero_node);
86df10e3
SP
2252 if (xz_p)
2253 {
d93817c4
ZD
2254 affine_fn t0 = ova1;
2255 affine_fn t2 = ovb;
2256
2257 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2258 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2259 affine_fn_free (t0);
2260 affine_fn_free (t2);
86df10e3
SP
2261 *last_conflicts = last_conflicts_xz;
2262 }
2263 if (yz_p)
2264 {
d93817c4
ZD
2265 affine_fn t0 = ova2;
2266 affine_fn t2 = ovb;
2267
2268 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2269 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2270 affine_fn_free (t0);
2271 affine_fn_free (t2);
86df10e3
SP
2272 *last_conflicts = last_conflicts_yz;
2273 }
2274 if (xyz_p)
2275 {
d93817c4
ZD
2276 affine_fn t0 = ova1;
2277 affine_fn t2 = ova2;
2278 affine_fn t4 = ovb;
2279
2280 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2281 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2282 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2283 affine_fn_free (t0);
2284 affine_fn_free (t2);
2285 affine_fn_free (t4);
86df10e3
SP
2286 *last_conflicts = last_conflicts_xyz;
2287 }
d93817c4
ZD
2288 *overlaps_a = conflict_fn (2, ova1, ova2);
2289 *overlaps_b = conflict_fn (1, ovb);
86df10e3
SP
2290 }
2291 else
2292 {
d93817c4
ZD
2293 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2294 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
86df10e3
SP
2295 *last_conflicts = integer_zero_node;
2296 }
d93817c4
ZD
2297
2298 affine_fn_free (overlaps_a_xz);
2299 affine_fn_free (overlaps_b_xz);
2300 affine_fn_free (overlaps_a_yz);
2301 affine_fn_free (overlaps_b_yz);
2302 affine_fn_free (overlaps_a_xyz);
2303 affine_fn_free (overlaps_b_xyz);
56cf8686
SP
2304}
2305
b305e3da
SP
2306/* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2307
2308static void
2309lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2310 int size)
2311{
2312 memcpy (vec2, vec1, size * sizeof (*vec1));
2313}
2314
2315/* Copy the elements of M x N matrix MAT1 to MAT2. */
2316
2317static void
2318lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2319 int m, int n)
2320{
2321 int i;
2322
2323 for (i = 0; i < m; i++)
2324 lambda_vector_copy (mat1[i], mat2[i], n);
2325}
2326
2327/* Store the N x N identity matrix in MAT. */
2328
2329static void
2330lambda_matrix_id (lambda_matrix mat, int size)
2331{
2332 int i, j;
2333
2334 for (i = 0; i < size; i++)
2335 for (j = 0; j < size; j++)
2336 mat[i][j] = (i == j) ? 1 : 0;
2337}
2338
2339/* Return the first nonzero element of vector VEC1 between START and N.
2340 We must have START <= N. Returns N if VEC1 is the zero vector. */
2341
2342static int
2343lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2344{
2345 int j = start;
2346 while (j < n && vec1[j] == 0)
2347 j++;
2348 return j;
2349}
2350
2351/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2352 R2 = R2 + CONST1 * R1. */
2353
2354static void
2355lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2356{
2357 int i;
2358
2359 if (const1 == 0)
2360 return;
2361
2362 for (i = 0; i < n; i++)
2363 mat[r2][i] += const1 * mat[r1][i];
2364}
2365
2366/* Swap rows R1 and R2 in matrix MAT. */
2367
2368static void
2369lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2370{
2371 lambda_vector row;
2372
2373 row = mat[r1];
2374 mat[r1] = mat[r2];
2375 mat[r2] = row;
2376}
2377
2378/* Multiply vector VEC1 of length SIZE by a constant CONST1,
2379 and store the result in VEC2. */
2380
2381static void
2382lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2383 int size, int const1)
2384{
2385 int i;
2386
2387 if (const1 == 0)
2388 lambda_vector_clear (vec2, size);
2389 else
2390 for (i = 0; i < size; i++)
2391 vec2[i] = const1 * vec1[i];
2392}
2393
2394/* Negate vector VEC1 with length SIZE and store it in VEC2. */
2395
2396static void
2397lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2398 int size)
2399{
2400 lambda_vector_mult_const (vec1, vec2, size, -1);
2401}
2402
2403/* Negate row R1 of matrix MAT which has N columns. */
2404
2405static void
2406lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2407{
2408 lambda_vector_negate (mat[r1], mat[r1], n);
2409}
2410
2411/* Return true if two vectors are equal. */
2412
2413static bool
2414lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2415{
2416 int i;
2417 for (i = 0; i < size; i++)
2418 if (vec1[i] != vec2[i])
2419 return false;
2420 return true;
2421}
2422
2423/* Given an M x N integer matrix A, this function determines an M x
2424 M unimodular matrix U, and an M x N echelon matrix S such that
2425 "U.A = S". This decomposition is also known as "right Hermite".
2426
2427 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2428 Restructuring Compilers" Utpal Banerjee. */
2429
2430static void
2431lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2432 lambda_matrix S, lambda_matrix U)
2433{
2434 int i, j, i0 = 0;
2435
2436 lambda_matrix_copy (A, S, m, n);
2437 lambda_matrix_id (U, m);
2438
2439 for (j = 0; j < n; j++)
2440 {
2441 if (lambda_vector_first_nz (S[j], m, i0) < m)
2442 {
2443 ++i0;
2444 for (i = m - 1; i >= i0; i--)
2445 {
2446 while (S[i][j] != 0)
2447 {
2448 int sigma, factor, a, b;
2449
2450 a = S[i-1][j];
2451 b = S[i][j];
2452 sigma = (a * b < 0) ? -1: 1;
2453 a = abs (a);
2454 b = abs (b);
2455 factor = sigma * (a / b);
2456
2457 lambda_matrix_row_add (S, n, i, i-1, -factor);
2458 lambda_matrix_row_exchange (S, i, i-1);
2459
2460 lambda_matrix_row_add (U, m, i, i-1, -factor);
2461 lambda_matrix_row_exchange (U, i, i-1);
2462 }
2463 }
2464 }
2465 }
2466}
2467
56cf8686 2468/* Determines the overlapping elements due to accesses CHREC_A and
0ff4040e
SP
2469 CHREC_B, that are affine functions. This function cannot handle
2470 symbolic evolution functions, ie. when initial conditions are
2471 parameters, because it uses lambda matrices of integers. */
56cf8686
SP
2472
2473static void
b8698a0f 2474analyze_subscript_affine_affine (tree chrec_a,
56cf8686 2475 tree chrec_b,
b8698a0f
L
2476 conflict_function **overlaps_a,
2477 conflict_function **overlaps_b,
86df10e3 2478 tree *last_conflicts)
56cf8686 2479{
86df10e3 2480 unsigned nb_vars_a, nb_vars_b, dim;
fd727b34 2481 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
86df10e3 2482 lambda_matrix A, U, S;
f873b205 2483 struct obstack scratch_obstack;
86df10e3 2484
e2157b49 2485 if (eq_evolutions_p (chrec_a, chrec_b))
416f403e 2486 {
e2157b49
SP
2487 /* The accessed index overlaps for each iteration in the
2488 loop. */
d93817c4
ZD
2489 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2490 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
416f403e
DB
2491 *last_conflicts = chrec_dont_know;
2492 return;
2493 }
56cf8686
SP
2494 if (dump_file && (dump_flags & TDF_DETAILS))
2495 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
b8698a0f 2496
56cf8686
SP
2497 /* For determining the initial intersection, we have to solve a
2498 Diophantine equation. This is the most time consuming part.
b8698a0f 2499
56cf8686
SP
2500 For answering to the question: "Is there a dependence?" we have
2501 to prove that there exists a solution to the Diophantine
2502 equation, and that the solution is in the iteration domain,
89dbed81 2503 i.e. the solution is positive or zero, and that the solution
56cf8686
SP
2504 happens before the upper bound loop.nb_iterations. Otherwise
2505 there is no dependence. This function outputs a description of
2506 the iterations that hold the intersections. */
2507
86df10e3
SP
2508 nb_vars_a = nb_vars_in_chrec (chrec_a);
2509 nb_vars_b = nb_vars_in_chrec (chrec_b);
2510
f873b205
LB
2511 gcc_obstack_init (&scratch_obstack);
2512
86df10e3 2513 dim = nb_vars_a + nb_vars_b;
f873b205
LB
2514 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2515 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2516 S = lambda_matrix_new (dim, 1, &scratch_obstack);
86df10e3 2517
5b78fc3e
JS
2518 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2519 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
86df10e3
SP
2520 gamma = init_b - init_a;
2521
2522 /* Don't do all the hard work of solving the Diophantine equation
b8698a0f 2523 when we already know the solution: for example,
86df10e3
SP
2524 | {3, +, 1}_1
2525 | {3, +, 4}_2
2526 | gamma = 3 - 3 = 0.
b8698a0f 2527 Then the first overlap occurs during the first iterations:
86df10e3
SP
2528 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2529 */
2530 if (gamma == 0)
56cf8686 2531 {
86df10e3 2532 if (nb_vars_a == 1 && nb_vars_b == 1)
56cf8686 2533 {
fd727b34 2534 HOST_WIDE_INT step_a, step_b;
4839cb59 2535 HOST_WIDE_INT niter, niter_a, niter_b;
d93817c4 2536 affine_fn ova, ovb;
86df10e3 2537
652c4c71
RG
2538 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2539 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
86df10e3 2540 niter = MIN (niter_a, niter_b);
6b6fa4e9
SP
2541 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2542 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
86df10e3 2543
b8698a0f
L
2544 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2545 &ova, &ovb,
86df10e3 2546 last_conflicts, 1);
d93817c4
ZD
2547 *overlaps_a = conflict_fn (1, ova);
2548 *overlaps_b = conflict_fn (1, ovb);
56cf8686 2549 }
86df10e3
SP
2550
2551 else if (nb_vars_a == 2 && nb_vars_b == 1)
2552 compute_overlap_steps_for_affine_1_2
2553 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2554
2555 else if (nb_vars_a == 1 && nb_vars_b == 2)
2556 compute_overlap_steps_for_affine_1_2
2557 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2558
2559 else
56cf8686 2560 {
0ff4040e
SP
2561 if (dump_file && (dump_flags & TDF_DETAILS))
2562 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
d93817c4
ZD
2563 *overlaps_a = conflict_fn_not_known ();
2564 *overlaps_b = conflict_fn_not_known ();
86df10e3 2565 *last_conflicts = chrec_dont_know;
56cf8686 2566 }
0ff4040e 2567 goto end_analyze_subs_aa;
86df10e3
SP
2568 }
2569
2570 /* U.A = S */
2571 lambda_matrix_right_hermite (A, dim, 1, S, U);
2572
2573 if (S[0][0] < 0)
2574 {
2575 S[0][0] *= -1;
2576 lambda_matrix_row_negate (U, dim, 0);
2577 }
2578 gcd_alpha_beta = S[0][0];
2579
ba42e045
SP
2580 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2581 but that is a quite strange case. Instead of ICEing, answer
2582 don't know. */
2583 if (gcd_alpha_beta == 0)
2584 {
d93817c4
ZD
2585 *overlaps_a = conflict_fn_not_known ();
2586 *overlaps_b = conflict_fn_not_known ();
ba42e045
SP
2587 *last_conflicts = chrec_dont_know;
2588 goto end_analyze_subs_aa;
2589 }
2590
86df10e3
SP
2591 /* The classic "gcd-test". */
2592 if (!int_divides_p (gcd_alpha_beta, gamma))
2593 {
2594 /* The "gcd-test" has determined that there is no integer
2595 solution, i.e. there is no dependence. */
d93817c4
ZD
2596 *overlaps_a = conflict_fn_no_dependence ();
2597 *overlaps_b = conflict_fn_no_dependence ();
86df10e3
SP
2598 *last_conflicts = integer_zero_node;
2599 }
2600
2601 /* Both access functions are univariate. This includes SIV and MIV cases. */
2602 else if (nb_vars_a == 1 && nb_vars_b == 1)
2603 {
2604 /* Both functions should have the same evolution sign. */
2605 if (((A[0][0] > 0 && -A[1][0] > 0)
2606 || (A[0][0] < 0 && -A[1][0] < 0)))
56cf8686
SP
2607 {
2608 /* The solutions are given by:
b8698a0f 2609 |
86df10e3
SP
2610 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2611 | [u21 u22] [y0]
b8698a0f 2612
56cf8686 2613 For a given integer t. Using the following variables,
b8698a0f 2614
56cf8686
SP
2615 | i0 = u11 * gamma / gcd_alpha_beta
2616 | j0 = u12 * gamma / gcd_alpha_beta
2617 | i1 = u21
2618 | j1 = u22
b8698a0f 2619
56cf8686 2620 the solutions are:
b8698a0f
L
2621
2622 | x0 = i0 + i1 * t,
86df10e3 2623 | y0 = j0 + j1 * t. */
2c26cbfd 2624 HOST_WIDE_INT i0, j0, i1, j1;
86df10e3
SP
2625
2626 i0 = U[0][0] * gamma / gcd_alpha_beta;
2627 j0 = U[0][1] * gamma / gcd_alpha_beta;
2628 i1 = U[1][0];
2629 j1 = U[1][1];
2630
2631 if ((i1 == 0 && i0 < 0)
2632 || (j1 == 0 && j0 < 0))
56cf8686 2633 {
b8698a0f
L
2634 /* There is no solution.
2635 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2636 falls in here, but for the moment we don't look at the
56cf8686 2637 upper bound of the iteration domain. */
d93817c4
ZD
2638 *overlaps_a = conflict_fn_no_dependence ();
2639 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 2640 *last_conflicts = integer_zero_node;
2c26cbfd 2641 goto end_analyze_subs_aa;
86df10e3
SP
2642 }
2643
2c26cbfd 2644 if (i1 > 0 && j1 > 0)
56cf8686 2645 {
652c4c71
RG
2646 HOST_WIDE_INT niter_a
2647 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2648 HOST_WIDE_INT niter_b
2649 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2c26cbfd
SP
2650 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2651
2652 /* (X0, Y0) is a solution of the Diophantine equation:
2653 "chrec_a (X0) = chrec_b (Y0)". */
2654 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2655 CEIL (-j0, j1));
2656 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2657 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2658
2659 /* (X1, Y1) is the smallest positive solution of the eq
2660 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2661 first conflict occurs. */
2662 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2663 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2664 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2665
2666 if (niter > 0)
56cf8686 2667 {
2c26cbfd
SP
2668 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2669 FLOOR_DIV (niter - j0, j1));
2670 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
86df10e3 2671
2c26cbfd
SP
2672 /* If the overlap occurs outside of the bounds of the
2673 loop, there is no dependence. */
9e517d61 2674 if (x1 >= niter || y1 >= niter)
56cf8686 2675 {
2c26cbfd
SP
2676 *overlaps_a = conflict_fn_no_dependence ();
2677 *overlaps_b = conflict_fn_no_dependence ();
2678 *last_conflicts = integer_zero_node;
2679 goto end_analyze_subs_aa;
56cf8686
SP
2680 }
2681 else
2c26cbfd 2682 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
56cf8686 2683 }
56cf8686 2684 else
2c26cbfd
SP
2685 *last_conflicts = chrec_dont_know;
2686
2687 *overlaps_a
2688 = conflict_fn (1,
2689 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2690 1,
2691 build_int_cst (NULL_TREE, i1)));
2692 *overlaps_b
2693 = conflict_fn (1,
2694 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2695 1,
2696 build_int_cst (NULL_TREE, j1)));
2697 }
2698 else
2699 {
2700 /* FIXME: For the moment, the upper bound of the
2701 iteration domain for i and j is not checked. */
2702 if (dump_file && (dump_flags & TDF_DETAILS))
2703 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2704 *overlaps_a = conflict_fn_not_known ();
2705 *overlaps_b = conflict_fn_not_known ();
2706 *last_conflicts = chrec_dont_know;
56cf8686
SP
2707 }
2708 }
86df10e3
SP
2709 else
2710 {
0ff4040e
SP
2711 if (dump_file && (dump_flags & TDF_DETAILS))
2712 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
d93817c4
ZD
2713 *overlaps_a = conflict_fn_not_known ();
2714 *overlaps_b = conflict_fn_not_known ();
86df10e3
SP
2715 *last_conflicts = chrec_dont_know;
2716 }
56cf8686 2717 }
56cf8686
SP
2718 else
2719 {
0ff4040e
SP
2720 if (dump_file && (dump_flags & TDF_DETAILS))
2721 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
d93817c4
ZD
2722 *overlaps_a = conflict_fn_not_known ();
2723 *overlaps_b = conflict_fn_not_known ();
86df10e3 2724 *last_conflicts = chrec_dont_know;
56cf8686 2725 }
86df10e3 2726
b8698a0f 2727end_analyze_subs_aa:
f873b205 2728 obstack_free (&scratch_obstack, NULL);
56cf8686
SP
2729 if (dump_file && (dump_flags & TDF_DETAILS))
2730 {
2731 fprintf (dump_file, " (overlaps_a = ");
d93817c4 2732 dump_conflict_function (dump_file, *overlaps_a);
56cf8686 2733 fprintf (dump_file, ")\n (overlaps_b = ");
d93817c4 2734 dump_conflict_function (dump_file, *overlaps_b);
bcf1ef00 2735 fprintf (dump_file, "))\n");
56cf8686 2736 }
0ff4040e
SP
2737}
2738
2739/* Returns true when analyze_subscript_affine_affine can be used for
2740 determining the dependence relation between chrec_a and chrec_b,
2741 that contain symbols. This function modifies chrec_a and chrec_b
2742 such that the analysis result is the same, and such that they don't
b8698a0f 2743 contain symbols, and then can safely be passed to the analyzer.
0ff4040e
SP
2744
2745 Example: The analysis of the following tuples of evolutions produce
2746 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2747 vs. {0, +, 1}_1
b8698a0f 2748
0ff4040e
SP
2749 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2750 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2751*/
2752
2753static bool
2754can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2755{
16a2acea 2756 tree diff, type, left_a, left_b, right_b;
0ff4040e
SP
2757
2758 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2759 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2760 /* FIXME: For the moment not handled. Might be refined later. */
2761 return false;
2762
16a2acea
SP
2763 type = chrec_type (*chrec_a);
2764 left_a = CHREC_LEFT (*chrec_a);
726a989a 2765 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
16a2acea
SP
2766 diff = chrec_fold_minus (type, left_a, left_b);
2767
0ff4040e
SP
2768 if (!evolution_function_is_constant_p (diff))
2769 return false;
2770
56cf8686 2771 if (dump_file && (dump_flags & TDF_DETAILS))
0ff4040e
SP
2772 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2773
b8698a0f 2774 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
0ff4040e 2775 diff, CHREC_RIGHT (*chrec_a));
726a989a 2776 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
0ff4040e 2777 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
dc61cc6b 2778 build_int_cst (type, 0),
16a2acea 2779 right_b);
0ff4040e 2780 return true;
56cf8686
SP
2781}
2782
2783/* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2784 *OVERLAPS_B are initialized to the functions that describe the
2785 relation between the elements accessed twice by CHREC_A and
2786 CHREC_B. For k >= 0, the following property is verified:
2787
2788 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2789
2790static void
b8698a0f 2791analyze_siv_subscript (tree chrec_a,
56cf8686 2792 tree chrec_b,
b8698a0f
L
2793 conflict_function **overlaps_a,
2794 conflict_function **overlaps_b,
5b78fc3e
JS
2795 tree *last_conflicts,
2796 int loop_nest_num)
56cf8686 2797{
0ff4040e 2798 dependence_stats.num_siv++;
b8698a0f 2799
56cf8686
SP
2800 if (dump_file && (dump_flags & TDF_DETAILS))
2801 fprintf (dump_file, "(analyze_siv_subscript \n");
b8698a0f 2802
56cf8686 2803 if (evolution_function_is_constant_p (chrec_a)
5b78fc3e 2804 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
b8698a0f 2805 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
86df10e3 2806 overlaps_a, overlaps_b, last_conflicts);
b8698a0f 2807
5b78fc3e 2808 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
56cf8686 2809 && evolution_function_is_constant_p (chrec_b))
b8698a0f 2810 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
86df10e3 2811 overlaps_b, overlaps_a, last_conflicts);
b8698a0f 2812
5b78fc3e
JS
2813 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2814 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
0ff4040e
SP
2815 {
2816 if (!chrec_contains_symbols (chrec_a)
2817 && !chrec_contains_symbols (chrec_b))
2818 {
b8698a0f
L
2819 analyze_subscript_affine_affine (chrec_a, chrec_b,
2820 overlaps_a, overlaps_b,
0ff4040e
SP
2821 last_conflicts);
2822
d93817c4
ZD
2823 if (CF_NOT_KNOWN_P (*overlaps_a)
2824 || CF_NOT_KNOWN_P (*overlaps_b))
0ff4040e 2825 dependence_stats.num_siv_unimplemented++;
d93817c4
ZD
2826 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2827 || CF_NO_DEPENDENCE_P (*overlaps_b))
0ff4040e
SP
2828 dependence_stats.num_siv_independent++;
2829 else
2830 dependence_stats.num_siv_dependent++;
2831 }
b8698a0f 2832 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
0ff4040e
SP
2833 &chrec_b))
2834 {
b8698a0f
L
2835 analyze_subscript_affine_affine (chrec_a, chrec_b,
2836 overlaps_a, overlaps_b,
0ff4040e 2837 last_conflicts);
0ff4040e 2838
d93817c4
ZD
2839 if (CF_NOT_KNOWN_P (*overlaps_a)
2840 || CF_NOT_KNOWN_P (*overlaps_b))
0ff4040e 2841 dependence_stats.num_siv_unimplemented++;
d93817c4
ZD
2842 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2843 || CF_NO_DEPENDENCE_P (*overlaps_b))
0ff4040e
SP
2844 dependence_stats.num_siv_independent++;
2845 else
2846 dependence_stats.num_siv_dependent++;
2847 }
2848 else
2849 goto siv_subscript_dontknow;
2850 }
2851
56cf8686
SP
2852 else
2853 {
0ff4040e
SP
2854 siv_subscript_dontknow:;
2855 if (dump_file && (dump_flags & TDF_DETAILS))
bcf1ef00 2856 fprintf (dump_file, " siv test failed: unimplemented");
d93817c4
ZD
2857 *overlaps_a = conflict_fn_not_known ();
2858 *overlaps_b = conflict_fn_not_known ();
86df10e3 2859 *last_conflicts = chrec_dont_know;
0ff4040e 2860 dependence_stats.num_siv_unimplemented++;
56cf8686 2861 }
b8698a0f 2862
56cf8686
SP
2863 if (dump_file && (dump_flags & TDF_DETAILS))
2864 fprintf (dump_file, ")\n");
2865}
2866
55a700ac
ZD
2867/* Returns false if we can prove that the greatest common divisor of the steps
2868 of CHREC does not divide CST, false otherwise. */
56cf8686
SP
2869
2870static bool
ed7a4b4b 2871gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
56cf8686 2872{
55a700ac
ZD
2873 HOST_WIDE_INT cd = 0, val;
2874 tree step;
0ff4040e 2875
9541ffee 2876 if (!tree_fits_shwi_p (cst))
55a700ac 2877 return true;
9439e9a1 2878 val = tree_to_shwi (cst);
55a700ac
ZD
2879
2880 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2881 {
2882 step = CHREC_RIGHT (chrec);
9541ffee 2883 if (!tree_fits_shwi_p (step))
55a700ac 2884 return true;
9439e9a1 2885 cd = gcd (cd, tree_to_shwi (step));
55a700ac 2886 chrec = CHREC_LEFT (chrec);
56cf8686 2887 }
55a700ac
ZD
2888
2889 return val % cd == 0;
56cf8686
SP
2890}
2891
da9a21f4
SP
2892/* Analyze a MIV (Multiple Index Variable) subscript with respect to
2893 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2894 functions that describe the relation between the elements accessed
2895 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2896 is verified:
56cf8686
SP
2897
2898 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2899
2900static void
b8698a0f
L
2901analyze_miv_subscript (tree chrec_a,
2902 tree chrec_b,
2903 conflict_function **overlaps_a,
2904 conflict_function **overlaps_b,
da9a21f4
SP
2905 tree *last_conflicts,
2906 struct loop *loop_nest)
56cf8686 2907{
33b30201
SP
2908 tree type, difference;
2909
0ff4040e 2910 dependence_stats.num_miv++;
56cf8686
SP
2911 if (dump_file && (dump_flags & TDF_DETAILS))
2912 fprintf (dump_file, "(analyze_miv_subscript \n");
e2157b49 2913
33b30201 2914 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
726a989a
RB
2915 chrec_a = chrec_convert (type, chrec_a, NULL);
2916 chrec_b = chrec_convert (type, chrec_b, NULL);
33b30201 2917 difference = chrec_fold_minus (type, chrec_a, chrec_b);
b8698a0f 2918
e2157b49 2919 if (eq_evolutions_p (chrec_a, chrec_b))
56cf8686
SP
2920 {
2921 /* Access functions are the same: all the elements are accessed
2922 in the same order. */
d93817c4
ZD
2923 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2924 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
b4a9343c 2925 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
0ff4040e 2926 dependence_stats.num_miv_dependent++;
56cf8686 2927 }
b8698a0f 2928
56cf8686
SP
2929 else if (evolution_function_is_constant_p (difference)
2930 /* For the moment, the following is verified:
da9a21f4
SP
2931 evolution_function_is_affine_multivariate_p (chrec_a,
2932 loop_nest->num) */
55a700ac 2933 && !gcd_of_steps_may_divide_p (chrec_a, difference))
56cf8686
SP
2934 {
2935 /* testsuite/.../ssa-chrec-33.c
b8698a0f
L
2936 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2937
55a700ac
ZD
2938 The difference is 1, and all the evolution steps are multiples
2939 of 2, consequently there are no overlapping elements. */
d93817c4
ZD
2940 *overlaps_a = conflict_fn_no_dependence ();
2941 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 2942 *last_conflicts = integer_zero_node;
0ff4040e 2943 dependence_stats.num_miv_independent++;
56cf8686 2944 }
b8698a0f 2945
da9a21f4 2946 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
0ff4040e 2947 && !chrec_contains_symbols (chrec_a)
da9a21f4 2948 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
0ff4040e 2949 && !chrec_contains_symbols (chrec_b))
56cf8686
SP
2950 {
2951 /* testsuite/.../ssa-chrec-35.c
2952 {0, +, 1}_2 vs. {0, +, 1}_3
2953 the overlapping elements are respectively located at iterations:
b8698a0f
L
2954 {0, +, 1}_x and {0, +, 1}_x,
2955 in other words, we have the equality:
86df10e3 2956 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
b8698a0f
L
2957
2958 Other examples:
2959 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
86df10e3
SP
2960 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2961
b8698a0f 2962 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
86df10e3 2963 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
56cf8686 2964 */
b8698a0f 2965 analyze_subscript_affine_affine (chrec_a, chrec_b,
86df10e3 2966 overlaps_a, overlaps_b, last_conflicts);
0ff4040e 2967
d93817c4
ZD
2968 if (CF_NOT_KNOWN_P (*overlaps_a)
2969 || CF_NOT_KNOWN_P (*overlaps_b))
0ff4040e 2970 dependence_stats.num_miv_unimplemented++;
d93817c4
ZD
2971 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2972 || CF_NO_DEPENDENCE_P (*overlaps_b))
0ff4040e
SP
2973 dependence_stats.num_miv_independent++;
2974 else
2975 dependence_stats.num_miv_dependent++;
56cf8686 2976 }
b8698a0f 2977
56cf8686
SP
2978 else
2979 {
2980 /* When the analysis is too difficult, answer "don't know". */
0ff4040e
SP
2981 if (dump_file && (dump_flags & TDF_DETAILS))
2982 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2983
d93817c4
ZD
2984 *overlaps_a = conflict_fn_not_known ();
2985 *overlaps_b = conflict_fn_not_known ();
86df10e3 2986 *last_conflicts = chrec_dont_know;
0ff4040e 2987 dependence_stats.num_miv_unimplemented++;
56cf8686 2988 }
b8698a0f 2989
56cf8686
SP
2990 if (dump_file && (dump_flags & TDF_DETAILS))
2991 fprintf (dump_file, ")\n");
2992}
2993
da9a21f4
SP
2994/* Determines the iterations for which CHREC_A is equal to CHREC_B in
2995 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2996 OVERLAP_ITERATIONS_B are initialized with two functions that
2997 describe the iterations that contain conflicting elements.
b8698a0f 2998
56cf8686 2999 Remark: For an integer k >= 0, the following equality is true:
b8698a0f 3000
56cf8686
SP
3001 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3002*/
3003
b8698a0f
L
3004static void
3005analyze_overlapping_iterations (tree chrec_a,
3006 tree chrec_b,
3007 conflict_function **overlap_iterations_a,
3008 conflict_function **overlap_iterations_b,
da9a21f4 3009 tree *last_conflicts, struct loop *loop_nest)
56cf8686 3010{
da9a21f4
SP
3011 unsigned int lnn = loop_nest->num;
3012
0ff4040e 3013 dependence_stats.num_subscript_tests++;
b8698a0f 3014
56cf8686
SP
3015 if (dump_file && (dump_flags & TDF_DETAILS))
3016 {
3017 fprintf (dump_file, "(analyze_overlapping_iterations \n");
3018 fprintf (dump_file, " (chrec_a = ");
3019 print_generic_expr (dump_file, chrec_a, 0);
0ff4040e 3020 fprintf (dump_file, ")\n (chrec_b = ");
56cf8686
SP
3021 print_generic_expr (dump_file, chrec_b, 0);
3022 fprintf (dump_file, ")\n");
3023 }
0ff4040e 3024
56cf8686
SP
3025 if (chrec_a == NULL_TREE
3026 || chrec_b == NULL_TREE
3027 || chrec_contains_undetermined (chrec_a)
0ff4040e 3028 || chrec_contains_undetermined (chrec_b))
56cf8686 3029 {
0ff4040e 3030 dependence_stats.num_subscript_undetermined++;
b8698a0f 3031
d93817c4
ZD
3032 *overlap_iterations_a = conflict_fn_not_known ();
3033 *overlap_iterations_b = conflict_fn_not_known ();
56cf8686 3034 }
0ff4040e 3035
b8698a0f 3036 /* If they are the same chrec, and are affine, they overlap
0ff4040e
SP
3037 on every iteration. */
3038 else if (eq_evolutions_p (chrec_a, chrec_b)
3e6f8b56
SP
3039 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3040 || operand_equal_p (chrec_a, chrec_b, 0)))
0ff4040e
SP
3041 {
3042 dependence_stats.num_same_subscript_function++;
d93817c4
ZD
3043 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3044 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
0ff4040e
SP
3045 *last_conflicts = chrec_dont_know;
3046 }
3047
3048 /* If they aren't the same, and aren't affine, we can't do anything
3e6f8b56 3049 yet. */
b8698a0f 3050 else if ((chrec_contains_symbols (chrec_a)
0ff4040e 3051 || chrec_contains_symbols (chrec_b))
da9a21f4
SP
3052 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3053 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
0ff4040e
SP
3054 {
3055 dependence_stats.num_subscript_undetermined++;
d93817c4
ZD
3056 *overlap_iterations_a = conflict_fn_not_known ();
3057 *overlap_iterations_b = conflict_fn_not_known ();
0ff4040e
SP
3058 }
3059
56cf8686 3060 else if (ziv_subscript_p (chrec_a, chrec_b))
b8698a0f 3061 analyze_ziv_subscript (chrec_a, chrec_b,
86df10e3
SP
3062 overlap_iterations_a, overlap_iterations_b,
3063 last_conflicts);
b8698a0f 3064
56cf8686 3065 else if (siv_subscript_p (chrec_a, chrec_b))
b8698a0f
L
3066 analyze_siv_subscript (chrec_a, chrec_b,
3067 overlap_iterations_a, overlap_iterations_b,
5b78fc3e 3068 last_conflicts, lnn);
b8698a0f 3069
56cf8686 3070 else
b8698a0f 3071 analyze_miv_subscript (chrec_a, chrec_b,
86df10e3 3072 overlap_iterations_a, overlap_iterations_b,
da9a21f4 3073 last_conflicts, loop_nest);
b8698a0f 3074
56cf8686
SP
3075 if (dump_file && (dump_flags & TDF_DETAILS))
3076 {
3077 fprintf (dump_file, " (overlap_iterations_a = ");
d93817c4 3078 dump_conflict_function (dump_file, *overlap_iterations_a);
56cf8686 3079 fprintf (dump_file, ")\n (overlap_iterations_b = ");
d93817c4 3080 dump_conflict_function (dump_file, *overlap_iterations_b);
bcf1ef00 3081 fprintf (dump_file, "))\n");
56cf8686
SP
3082 }
3083}
3084
ba42e045 3085/* Helper function for uniquely inserting distance vectors. */
56cf8686 3086
ba42e045
SP
3087static void
3088save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3089{
3090 unsigned i;
3091 lambda_vector v;
56cf8686 3092
9771b263 3093 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
ba42e045
SP
3094 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3095 return;
56cf8686 3096
9771b263 3097 DDR_DIST_VECTS (ddr).safe_push (dist_v);
ba42e045 3098}
56cf8686 3099
ba42e045
SP
3100/* Helper function for uniquely inserting direction vectors. */
3101
3102static void
3103save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
56cf8686
SP
3104{
3105 unsigned i;
ba42e045 3106 lambda_vector v;
0ff4040e 3107
9771b263 3108 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
ba42e045
SP
3109 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3110 return;
3111
9771b263 3112 DDR_DIR_VECTS (ddr).safe_push (dir_v);
ba42e045
SP
3113}
3114
3115/* Add a distance of 1 on all the loops outer than INDEX. If we
3116 haven't yet determined a distance for this outer loop, push a new
3117 distance vector composed of the previous distance, and a distance
3118 of 1 for this outer loop. Example:
3119
3120 | loop_1
3121 | loop_2
3122 | A[10]
3123 | endloop_2
3124 | endloop_1
3125
3126 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3127 save (0, 1), then we have to save (1, 0). */
3128
3129static void
3130add_outer_distances (struct data_dependence_relation *ddr,
3131 lambda_vector dist_v, int index)
3132{
3133 /* For each outer loop where init_v is not set, the accesses are
3134 in dependence of distance 1 in the loop. */
3135 while (--index >= 0)
3136 {
3137 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3138 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3139 save_v[index] = 1;
3140 save_dist_v (ddr, save_v);
3141 }
3142}
3143
3144/* Return false when fail to represent the data dependence as a
3145 distance vector. INIT_B is set to true when a component has been
3146 added to the distance vector DIST_V. INDEX_CARRY is then set to
3147 the index in DIST_V that carries the dependence. */
3148
3149static bool
3150build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3151 struct data_reference *ddr_a,
3152 struct data_reference *ddr_b,
3153 lambda_vector dist_v, bool *init_b,
3154 int *index_carry)
3155{
3156 unsigned i;
3157 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
0ff4040e 3158
36d59cf7 3159 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
56cf8686 3160 {
86df10e3 3161 tree access_fn_a, access_fn_b;
36d59cf7 3162 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
56cf8686
SP
3163
3164 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
86df10e3
SP
3165 {
3166 non_affine_dependence_relation (ddr);
ba42e045 3167 return false;
86df10e3
SP
3168 }
3169
ba42e045
SP
3170 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3171 access_fn_b = DR_ACCESS_FN (ddr_b, i);
56cf8686 3172
b8698a0f 3173 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
86df10e3 3174 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
56cf8686 3175 {
ba42e045 3176 int dist, index;
a130584a
SP
3177 int var_a = CHREC_VARIABLE (access_fn_a);
3178 int var_b = CHREC_VARIABLE (access_fn_b);
ba42e045 3179
a130584a
SP
3180 if (var_a != var_b
3181 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
86df10e3
SP
3182 {
3183 non_affine_dependence_relation (ddr);
ba42e045 3184 return false;
86df10e3 3185 }
b8698a0f 3186
6b6fa4e9 3187 dist = int_cst_value (SUB_DISTANCE (subscript));
a130584a
SP
3188 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3189 *index_carry = MIN (index, *index_carry);
56cf8686 3190
ba42e045
SP
3191 /* This is the subscript coupling test. If we have already
3192 recorded a distance for this loop (a distance coming from
3193 another subscript), it should be the same. For example,
3194 in the following code, there is no dependence:
3195
56cf8686
SP
3196 | loop i = 0, N, 1
3197 | T[i+1][i] = ...
3198 | ... = T[i][i]
3199 | endloop
ba42e045
SP
3200 */
3201 if (init_v[index] != 0 && dist_v[index] != dist)
56cf8686 3202 {
36d59cf7 3203 finalize_ddr_dependent (ddr, chrec_known);
ba42e045 3204 return false;
56cf8686
SP
3205 }
3206
ba42e045
SP
3207 dist_v[index] = dist;
3208 init_v[index] = 1;
3209 *init_b = true;
3210 }
a50411de 3211 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
ba42e045
SP
3212 {
3213 /* This can be for example an affine vs. constant dependence
3214 (T[i] vs. T[3]) that is not an affine dependence and is
3215 not representable as a distance vector. */
3216 non_affine_dependence_relation (ddr);
3217 return false;
56cf8686
SP
3218 }
3219 }
304afda6 3220
ba42e045
SP
3221 return true;
3222}
304afda6 3223
1baf2906
SP
3224/* Return true when the DDR contains only constant access functions. */
3225
3226static bool
ed7a4b4b 3227constant_access_functions (const struct data_dependence_relation *ddr)
1baf2906
SP
3228{
3229 unsigned i;
3230
3231 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3232 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3233 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3234 return false;
3235
3236 return true;
3237}
3238
ba42e045 3239/* Helper function for the case where DDR_A and DDR_B are the same
097392de
SP
3240 multivariate access function with a constant step. For an example
3241 see pr34635-1.c. */
86df10e3 3242
ba42e045
SP
3243static void
3244add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3245{
3246 int x_1, x_2;
3247 tree c_1 = CHREC_LEFT (c_2);
3248 tree c_0 = CHREC_LEFT (c_1);
3249 lambda_vector dist_v;
0ca2faee 3250 int v1, v2, cd;
86df10e3 3251
b1e75954
SP
3252 /* Polynomials with more than 2 variables are not handled yet. When
3253 the evolution steps are parameters, it is not possible to
3254 represent the dependence using classical distance vectors. */
3255 if (TREE_CODE (c_0) != INTEGER_CST
3256 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3257 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3258 {
3259 DDR_AFFINE_P (ddr) = false;
ba42e045
SP
3260 return;
3261 }
304afda6 3262
ba42e045
SP
3263 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3264 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
304afda6 3265
ba42e045
SP
3266 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3267 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
6b6fa4e9
SP
3268 v1 = int_cst_value (CHREC_RIGHT (c_1));
3269 v2 = int_cst_value (CHREC_RIGHT (c_2));
0ca2faee
ZD
3270 cd = gcd (v1, v2);
3271 v1 /= cd;
3272 v2 /= cd;
3273
3274 if (v2 < 0)
3275 {
3276 v2 = -v2;
3277 v1 = -v1;
3278 }
3279
3280 dist_v[x_1] = v2;
3281 dist_v[x_2] = -v1;
ba42e045 3282 save_dist_v (ddr, dist_v);
304afda6 3283
ba42e045
SP
3284 add_outer_distances (ddr, dist_v, x_1);
3285}
304afda6 3286
ba42e045
SP
3287/* Helper function for the case where DDR_A and DDR_B are the same
3288 access functions. */
37b8a73b 3289
ba42e045
SP
3290static void
3291add_other_self_distances (struct data_dependence_relation *ddr)
3292{
3293 lambda_vector dist_v;
3294 unsigned i;
3295 int index_carry = DDR_NB_LOOPS (ddr);
304afda6 3296
ba42e045 3297 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
37b8a73b 3298 {
ba42e045 3299 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
304afda6 3300
ba42e045 3301 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
304afda6 3302 {
ba42e045
SP
3303 if (!evolution_function_is_univariate_p (access_fun))
3304 {
3305 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3306 {
3307 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3308 return;
3309 }
3310
097392de
SP
3311 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3312
3313 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3314 add_multivariate_self_dist (ddr, access_fun);
3315 else
3316 /* The evolution step is not constant: it varies in
3317 the outer loop, so this cannot be represented by a
3318 distance vector. For example in pr34635.c the
3319 evolution is {0, +, {0, +, 4}_1}_2. */
3320 DDR_AFFINE_P (ddr) = false;
3321
ba42e045
SP
3322 return;
3323 }
3324
3325 index_carry = MIN (index_carry,
3326 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3327 DDR_LOOP_NEST (ddr)));
304afda6 3328 }
37b8a73b
SP
3329 }
3330
ba42e045
SP
3331 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3332 add_outer_distances (ddr, dist_v, index_carry);
56cf8686
SP
3333}
3334
1baf2906
SP
3335static void
3336insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3337{
3338 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3339
3340 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3341 save_dist_v (ddr, dist_v);
3342}
3343
3344/* Adds a unit distance vector to DDR when there is a 0 overlap. This
3345 is the case for example when access functions are the same and
3346 equal to a constant, as in:
3347
3348 | loop_1
3349 | A[3] = ...
3350 | ... = A[3]
3351 | endloop_1
3352
3353 in which case the distance vectors are (0) and (1). */
3354
3355static void
3356add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3357{
3358 unsigned i, j;
3359
3360 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3361 {
3362 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3363 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3364 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3365
3366 for (j = 0; j < ca->n; j++)
3367 if (affine_function_zero_p (ca->fns[j]))
3368 {
3369 insert_innermost_unit_dist_vector (ddr);
3370 return;
3371 }
3372
3373 for (j = 0; j < cb->n; j++)
3374 if (affine_function_zero_p (cb->fns[j]))
3375 {
3376 insert_innermost_unit_dist_vector (ddr);
3377 return;
3378 }
3379 }
3380}
3381
ba42e045
SP
3382/* Compute the classic per loop distance vector. DDR is the data
3383 dependence relation to build a vector from. Return false when fail
3384 to represent the data dependence as a distance vector. */
56cf8686 3385
464f49d8 3386static bool
da9a21f4
SP
3387build_classic_dist_vector (struct data_dependence_relation *ddr,
3388 struct loop *loop_nest)
56cf8686 3389{
304afda6 3390 bool init_b = false;
ba42e045
SP
3391 int index_carry = DDR_NB_LOOPS (ddr);
3392 lambda_vector dist_v;
304afda6 3393
36d59cf7 3394 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2f470326 3395 return false;
ba42e045
SP
3396
3397 if (same_access_functions (ddr))
56cf8686 3398 {
ba42e045
SP
3399 /* Save the 0 vector. */
3400 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3401 save_dist_v (ddr, dist_v);
56cf8686 3402
1baf2906
SP
3403 if (constant_access_functions (ddr))
3404 add_distance_for_zero_overlaps (ddr);
3405
ba42e045
SP
3406 if (DDR_NB_LOOPS (ddr) > 1)
3407 add_other_self_distances (ddr);
86df10e3 3408
ba42e045
SP
3409 return true;
3410 }
86df10e3 3411
ba42e045
SP
3412 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3413 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3414 dist_v, &init_b, &index_carry))
3415 return false;
86df10e3 3416
ba42e045
SP
3417 /* Save the distance vector if we initialized one. */
3418 if (init_b)
3419 {
3420 /* Verify a basic constraint: classic distance vectors should
3421 always be lexicographically positive.
3422
3423 Data references are collected in the order of execution of
3424 the program, thus for the following loop
3425
3426 | for (i = 1; i < 100; i++)
3427 | for (j = 1; j < 100; j++)
3428 | {
3429 | t = T[j+1][i-1]; // A
3430 | T[j][i] = t + 2; // B
3431 | }
3432
3433 references are collected following the direction of the wind:
3434 A then B. The data dependence tests are performed also
3435 following this order, such that we're looking at the distance
3436 separating the elements accessed by A from the elements later
3437 accessed by B. But in this example, the distance returned by
3438 test_dep (A, B) is lexicographically negative (-1, 1), that
3439 means that the access A occurs later than B with respect to
3440 the outer loop, ie. we're actually looking upwind. In this
3441 case we solve test_dep (B, A) looking downwind to the
3442 lexicographically positive solution, that returns the
3443 distance vector (1, -1). */
3444 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3445 {
3446 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2f470326
JJ
3447 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3448 loop_nest))
3449 return false;
ba42e045 3450 compute_subscript_distance (ddr);
2f470326
JJ
3451 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3452 save_v, &init_b, &index_carry))
3453 return false;
ba42e045 3454 save_dist_v (ddr, save_v);
71d5b5e1 3455 DDR_REVERSED_P (ddr) = true;
ba42e045
SP
3456
3457 /* In this case there is a dependence forward for all the
3458 outer loops:
3459
3460 | for (k = 1; k < 100; k++)
3461 | for (i = 1; i < 100; i++)
3462 | for (j = 1; j < 100; j++)
3463 | {
3464 | t = T[j+1][i-1]; // A
3465 | T[j][i] = t + 2; // B
3466 | }
3467
b8698a0f 3468 the vectors are:
ba42e045
SP
3469 (0, 1, -1)
3470 (1, 1, -1)
3471 (1, -1, 1)
3472 */
3473 if (DDR_NB_LOOPS (ddr) > 1)
3474 {
3475 add_outer_distances (ddr, save_v, index_carry);
3476 add_outer_distances (ddr, dist_v, index_carry);
86df10e3 3477 }
ba42e045
SP
3478 }
3479 else
3480 {
3481 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3482 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
86df10e3 3483
ba42e045 3484 if (DDR_NB_LOOPS (ddr) > 1)
56cf8686 3485 {
ba42e045 3486 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
86df10e3 3487
2f470326
JJ
3488 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3489 DDR_A (ddr), loop_nest))
3490 return false;
ba42e045 3491 compute_subscript_distance (ddr);
2f470326
JJ
3492 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3493 opposite_v, &init_b,
3494 &index_carry))
3495 return false;
86df10e3 3496
2f470326 3497 save_dist_v (ddr, save_v);
ba42e045
SP
3498 add_outer_distances (ddr, dist_v, index_carry);
3499 add_outer_distances (ddr, opposite_v, index_carry);
56cf8686 3500 }
2f470326
JJ
3501 else
3502 save_dist_v (ddr, save_v);
56cf8686
SP
3503 }
3504 }
ba42e045
SP
3505 else
3506 {
3507 /* There is a distance of 1 on all the outer loops: Example:
3508 there is a dependence of distance 1 on loop_1 for the array A.
304afda6 3509
ba42e045
SP
3510 | loop_1
3511 | A[5] = ...
3512 | endloop
3513 */
3514 add_outer_distances (ddr, dist_v,
3515 lambda_vector_first_nz (dist_v,
3516 DDR_NB_LOOPS (ddr), 0));
3517 }
3518
3519 if (dump_file && (dump_flags & TDF_DETAILS))
304afda6 3520 {
ba42e045 3521 unsigned i;
304afda6 3522
ba42e045
SP
3523 fprintf (dump_file, "(build_classic_dist_vector\n");
3524 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3525 {
3526 fprintf (dump_file, " dist_vector = (");
3527 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3528 DDR_NB_LOOPS (ddr));
3529 fprintf (dump_file, " )\n");
3530 }
3531 fprintf (dump_file, ")\n");
304afda6
SP
3532 }
3533
ba42e045
SP
3534 return true;
3535}
56cf8686 3536
ba42e045
SP
3537/* Return the direction for a given distance.
3538 FIXME: Computing dir this way is suboptimal, since dir can catch
3539 cases that dist is unable to represent. */
86df10e3 3540
ba42e045
SP
3541static inline enum data_dependence_direction
3542dir_from_dist (int dist)
3543{
3544 if (dist > 0)
3545 return dir_positive;
3546 else if (dist < 0)
3547 return dir_negative;
3548 else
3549 return dir_equal;
3550}
304afda6 3551
ba42e045
SP
3552/* Compute the classic per loop direction vector. DDR is the data
3553 dependence relation to build a vector from. */
304afda6 3554
ba42e045
SP
3555static void
3556build_classic_dir_vector (struct data_dependence_relation *ddr)
3557{
3558 unsigned i, j;
3559 lambda_vector dist_v;
86df10e3 3560
9771b263 3561 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
ba42e045
SP
3562 {
3563 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
86df10e3 3564
ba42e045
SP
3565 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3566 dir_v[j] = dir_from_dist (dist_v[j]);
304afda6 3567
ba42e045
SP
3568 save_dir_v (ddr, dir_v);
3569 }
56cf8686
SP
3570}
3571
ba42e045
SP
3572/* Helper function. Returns true when there is a dependence between
3573 data references DRA and DRB. */
0ff4040e 3574
ba42e045
SP
3575static bool
3576subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3577 struct data_reference *dra,
da9a21f4
SP
3578 struct data_reference *drb,
3579 struct loop *loop_nest)
0ff4040e
SP
3580{
3581 unsigned int i;
0ff4040e 3582 tree last_conflicts;
ebf78a47 3583 struct subscript *subscript;
9b00587c 3584 tree res = NULL_TREE;
ba42e045 3585
9771b263 3586 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
0ff4040e 3587 {
d93817c4 3588 conflict_function *overlaps_a, *overlaps_b;
ebf78a47 3589
b8698a0f 3590 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
0ff4040e 3591 DR_ACCESS_FN (drb, i),
b8698a0f 3592 &overlaps_a, &overlaps_b,
da9a21f4 3593 &last_conflicts, loop_nest);
ebf78a47 3594
9b00587c
RG
3595 if (SUB_CONFLICTS_IN_A (subscript))
3596 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3597 if (SUB_CONFLICTS_IN_B (subscript))
3598 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3599
3600 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3601 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3602 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3603
3604 /* If there is any undetermined conflict function we have to
3605 give a conservative answer in case we cannot prove that
3606 no dependence exists when analyzing another subscript. */
d93817c4
ZD
3607 if (CF_NOT_KNOWN_P (overlaps_a)
3608 || CF_NOT_KNOWN_P (overlaps_b))
0ff4040e 3609 {
9b00587c
RG
3610 res = chrec_dont_know;
3611 continue;
0ff4040e 3612 }
ebf78a47 3613
9b00587c 3614 /* When there is a subscript with no dependence we can stop. */
d93817c4
ZD
3615 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3616 || CF_NO_DEPENDENCE_P (overlaps_b))
0ff4040e 3617 {
9b00587c
RG
3618 res = chrec_known;
3619 break;
0ff4040e
SP
3620 }
3621 }
3622
9b00587c
RG
3623 if (res == NULL_TREE)
3624 return true;
3625
3626 if (res == chrec_known)
3627 dependence_stats.num_dependence_independent++;
3628 else
3629 dependence_stats.num_dependence_undetermined++;
3630 finalize_ddr_dependent (ddr, res);
3631 return false;
ba42e045
SP
3632}
3633
da9a21f4 3634/* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
ba42e045
SP
3635
3636static void
da9a21f4
SP
3637subscript_dependence_tester (struct data_dependence_relation *ddr,
3638 struct loop *loop_nest)
ba42e045 3639{
da9a21f4 3640 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
ba42e045 3641 dependence_stats.num_dependence_dependent++;
0ff4040e 3642
0ff4040e 3643 compute_subscript_distance (ddr);
da9a21f4 3644 if (build_classic_dist_vector (ddr, loop_nest))
ba42e045 3645 build_classic_dir_vector (ddr);
0ff4040e
SP
3646}
3647
56cf8686 3648/* Returns true when all the access functions of A are affine or
da9a21f4 3649 constant with respect to LOOP_NEST. */
56cf8686 3650
b8698a0f 3651static bool
ed7a4b4b
KG
3652access_functions_are_affine_or_constant_p (const struct data_reference *a,
3653 const struct loop *loop_nest)
56cf8686
SP
3654{
3655 unsigned int i;
9771b263 3656 vec<tree> fns = DR_ACCESS_FNS (a);
9cbb7989 3657 tree t;
3d8864c0 3658
9771b263 3659 FOR_EACH_VEC_ELT (fns, i, t)
da9a21f4
SP
3660 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3661 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
56cf8686 3662 return false;
b8698a0f 3663
56cf8686
SP
3664 return true;
3665}
3666
3d8864c0
SP
3667/* Initializes an equation for an OMEGA problem using the information
3668 contained in the ACCESS_FUN. Returns true when the operation
3669 succeeded.
3670
3671 PB is the omega constraint system.
3672 EQ is the number of the equation to be initialized.
3673 OFFSET is used for shifting the variables names in the constraints:
3674 a constrain is composed of 2 * the number of variables surrounding
3675 dependence accesses. OFFSET is set either to 0 for the first n variables,
3676 then it is set to n.
3677 ACCESS_FUN is expected to be an affine chrec. */
3678
3679static bool
b8698a0f
L
3680init_omega_eq_with_af (omega_pb pb, unsigned eq,
3681 unsigned int offset, tree access_fun,
3d8864c0
SP
3682 struct data_dependence_relation *ddr)
3683{
3684 switch (TREE_CODE (access_fun))
3685 {
3686 case POLYNOMIAL_CHREC:
3687 {
3688 tree left = CHREC_LEFT (access_fun);
3689 tree right = CHREC_RIGHT (access_fun);
3690 int var = CHREC_VARIABLE (access_fun);
3691 unsigned var_idx;
3692
3693 if (TREE_CODE (right) != INTEGER_CST)
3694 return false;
3695
3696 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
6b6fa4e9 3697 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3d8864c0
SP
3698
3699 /* Compute the innermost loop index. */
3700 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3701
3702 if (offset == 0)
b8698a0f 3703 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
6b6fa4e9 3704 += int_cst_value (right);
3d8864c0
SP
3705
3706 switch (TREE_CODE (left))
3707 {
3708 case POLYNOMIAL_CHREC:
3709 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3710
3711 case INTEGER_CST:
6b6fa4e9 3712 pb->eqs[eq].coef[0] += int_cst_value (left);
3d8864c0
SP
3713 return true;
3714
3715 default:
3716 return false;
3717 }
3718 }
3719
3720 case INTEGER_CST:
6b6fa4e9 3721 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3d8864c0
SP
3722 return true;
3723
3724 default:
3725 return false;
3726 }
3727}
3728
3729/* As explained in the comments preceding init_omega_for_ddr, we have
3730 to set up a system for each loop level, setting outer loops
3731 variation to zero, and current loop variation to positive or zero.
3732 Save each lexico positive distance vector. */
3733
3734static void
3735omega_extract_distance_vectors (omega_pb pb,
3736 struct data_dependence_relation *ddr)
3737{
3738 int eq, geq;
3739 unsigned i, j;
3740 struct loop *loopi, *loopj;
3741 enum omega_result res;
3742
3743 /* Set a new problem for each loop in the nest. The basis is the
3744 problem that we have initialized until now. On top of this we
3745 add new constraints. */
b8698a0f 3746 for (i = 0; i <= DDR_INNER_LOOP (ddr)
9771b263 3747 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3d8864c0
SP
3748 {
3749 int dist = 0;
3750 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3751 DDR_NB_LOOPS (ddr));
3752
3753 omega_copy_problem (copy, pb);
3754
3755 /* For all the outer loops "loop_j", add "dj = 0". */
9771b263 3756 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3d8864c0
SP
3757 {
3758 eq = omega_add_zero_eq (copy, omega_black);
3759 copy->eqs[eq].coef[j + 1] = 1;
3760 }
3761
3762 /* For "loop_i", add "0 <= di". */
3763 geq = omega_add_zero_geq (copy, omega_black);
3764 copy->geqs[geq].coef[i + 1] = 1;
3765
3766 /* Reduce the constraint system, and test that the current
3767 problem is feasible. */
3768 res = omega_simplify_problem (copy);
b8698a0f 3769 if (res == omega_false
3d8864c0
SP
3770 || res == omega_unknown
3771 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3772 goto next_problem;
3773
3774 for (eq = 0; eq < copy->num_subs; eq++)
3775 if (copy->subs[eq].key == (int) i + 1)
3776 {
3777 dist = copy->subs[eq].coef[0];
3778 goto found_dist;
3779 }
3780
3781 if (dist == 0)
3782 {
3783 /* Reinitialize problem... */
3784 omega_copy_problem (copy, pb);
9771b263 3785 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3d8864c0
SP
3786 {
3787 eq = omega_add_zero_eq (copy, omega_black);
3788 copy->eqs[eq].coef[j + 1] = 1;
3789 }
3790
3791 /* ..., but this time "di = 1". */
3792 eq = omega_add_zero_eq (copy, omega_black);
3793 copy->eqs[eq].coef[i + 1] = 1;
3794 copy->eqs[eq].coef[0] = -1;
3795
3796 res = omega_simplify_problem (copy);
b8698a0f 3797 if (res == omega_false
3d8864c0
SP
3798 || res == omega_unknown
3799 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3800 goto next_problem;
3801
3802 for (eq = 0; eq < copy->num_subs; eq++)
3803 if (copy->subs[eq].key == (int) i + 1)
3804 {
3805 dist = copy->subs[eq].coef[0];
3806 goto found_dist;
3807 }
3808 }
3809
3810 found_dist:;
3811 /* Save the lexicographically positive distance vector. */
3812 if (dist >= 0)
3813 {
3814 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3815 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3816
3817 dist_v[i] = dist;
3818
3819 for (eq = 0; eq < copy->num_subs; eq++)
3820 if (copy->subs[eq].key > 0)
3821 {
3822 dist = copy->subs[eq].coef[0];
3823 dist_v[copy->subs[eq].key - 1] = dist;
3824 }
3825
3826 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3827 dir_v[j] = dir_from_dist (dist_v[j]);
3828
3829 save_dist_v (ddr, dist_v);
3830 save_dir_v (ddr, dir_v);
3831 }
3832
3833 next_problem:;
3834 omega_free_problem (copy);
3835 }
3836}
3837
3838/* This is called for each subscript of a tuple of data references:
3839 insert an equality for representing the conflicts. */
3840
3841static bool
3842omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3843 struct data_dependence_relation *ddr,
3844 omega_pb pb, bool *maybe_dependent)
3845{
3846 int eq;
33b30201
SP
3847 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3848 TREE_TYPE (access_fun_b));
726a989a
RB
3849 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3850 tree fun_b = chrec_convert (type, access_fun_b, NULL);
33b30201 3851 tree difference = chrec_fold_minus (type, fun_a, fun_b);
5a1f5f9a 3852 tree minus_one;
3d8864c0
SP
3853
3854 /* When the fun_a - fun_b is not constant, the dependence is not
3855 captured by the classic distance vector representation. */
3856 if (TREE_CODE (difference) != INTEGER_CST)
3857 return false;
3858
3859 /* ZIV test. */
3860 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3861 {
3862 /* There is no dependence. */
3863 *maybe_dependent = false;
3864 return true;
3865 }
3866
5a1f5f9a
SP
3867 minus_one = build_int_cst (type, -1);
3868 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3d8864c0
SP
3869
3870 eq = omega_add_zero_eq (pb, omega_black);
3871 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3872 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3873 /* There is probably a dependence, but the system of
3874 constraints cannot be built: answer "don't know". */
3875 return false;
3876
3877 /* GCD test. */
3878 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
b8698a0f 3879 && !int_divides_p (lambda_vector_gcd
3d8864c0
SP
3880 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3881 2 * DDR_NB_LOOPS (ddr)),
3882 pb->eqs[eq].coef[0]))
3883 {
3884 /* There is no dependence. */
3885 *maybe_dependent = false;
3886 return true;
3887 }
3888
3889 return true;
3890}
3891
3892/* Helper function, same as init_omega_for_ddr but specialized for
3893 data references A and B. */
3894
3895static bool
3896init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3897 struct data_dependence_relation *ddr,
3898 omega_pb pb, bool *maybe_dependent)
3899{
3900 unsigned i;
3901 int ineq;
3902 struct loop *loopi;
3903 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3904
3905 /* Insert an equality per subscript. */
3906 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3907 {
3908 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3909 ddr, pb, maybe_dependent))
3910 return false;
3911 else if (*maybe_dependent == false)
3912 {
3913 /* There is no dependence. */
3914 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3915 return true;
3916 }
3917 }
3918
3919 /* Insert inequalities: constraints corresponding to the iteration
3920 domain, i.e. the loops surrounding the references "loop_x" and
3921 the distance variables "dx". The layout of the OMEGA
3922 representation is as follows:
3923 - coef[0] is the constant
3924 - coef[1..nb_loops] are the protected variables that will not be
3925 removed by the solver: the "dx"
3926 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3927 */
b8698a0f 3928 for (i = 0; i <= DDR_INNER_LOOP (ddr)
9771b263 3929 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3d8864c0 3930 {
652c4c71 3931 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3d8864c0
SP
3932
3933 /* 0 <= loop_x */
3934 ineq = omega_add_zero_geq (pb, omega_black);
3935 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3936
3937 /* 0 <= loop_x + dx */
3938 ineq = omega_add_zero_geq (pb, omega_black);
3939 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3940 pb->geqs[ineq].coef[i + 1] = 1;
3941
3942 if (nbi != -1)
3943 {
3944 /* loop_x <= nb_iters */
3945 ineq = omega_add_zero_geq (pb, omega_black);
3946 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3947 pb->geqs[ineq].coef[0] = nbi;
3948
3949 /* loop_x + dx <= nb_iters */
3950 ineq = omega_add_zero_geq (pb, omega_black);
3951 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3952 pb->geqs[ineq].coef[i + 1] = -1;
3953 pb->geqs[ineq].coef[0] = nbi;
3954
3955 /* A step "dx" bigger than nb_iters is not feasible, so
3956 add "0 <= nb_iters + dx", */
3957 ineq = omega_add_zero_geq (pb, omega_black);
3958 pb->geqs[ineq].coef[i + 1] = 1;
3959 pb->geqs[ineq].coef[0] = nbi;
3960 /* and "dx <= nb_iters". */
3961 ineq = omega_add_zero_geq (pb, omega_black);
3962 pb->geqs[ineq].coef[i + 1] = -1;
3963 pb->geqs[ineq].coef[0] = nbi;
3964 }
3965 }
3966
3967 omega_extract_distance_vectors (pb, ddr);
3968
3969 return true;
3970}
3971
3972/* Sets up the Omega dependence problem for the data dependence
3973 relation DDR. Returns false when the constraint system cannot be
3974 built, ie. when the test answers "don't know". Returns true
3975 otherwise, and when independence has been proved (using one of the
3976 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3977 set MAYBE_DEPENDENT to true.
3978
3979 Example: for setting up the dependence system corresponding to the
b8698a0f 3980 conflicting accesses
3d8864c0
SP
3981
3982 | loop_i
3983 | loop_j
3984 | A[i, i+1] = ...
3985 | ... A[2*j, 2*(i + j)]
3986 | endloop_j
3987 | endloop_i
b8698a0f 3988
3d8864c0
SP
3989 the following constraints come from the iteration domain:
3990
3991 0 <= i <= Ni
3992 0 <= i + di <= Ni
3993 0 <= j <= Nj
3994 0 <= j + dj <= Nj
3995
3996 where di, dj are the distance variables. The constraints
3997 representing the conflicting elements are:
3998
3999 i = 2 * (j + dj)
4000 i + 1 = 2 * (i + di + j + dj)
4001
4002 For asking that the resulting distance vector (di, dj) be
4003 lexicographically positive, we insert the constraint "di >= 0". If
4004 "di = 0" in the solution, we fix that component to zero, and we
4005 look at the inner loops: we set a new problem where all the outer
4006 loop distances are zero, and fix this inner component to be
4007 positive. When one of the components is positive, we save that
4008 distance, and set a new problem where the distance on this loop is
4009 zero, searching for other distances in the inner loops. Here is
4010 the classic example that illustrates that we have to set for each
4011 inner loop a new problem:
4012
4013 | loop_1
4014 | loop_2
4015 | A[10]
4016 | endloop_2
4017 | endloop_1
4018
4019 we have to save two distances (1, 0) and (0, 1).
4020
4021 Given two array references, refA and refB, we have to set the
4022 dependence problem twice, refA vs. refB and refB vs. refA, and we
4023 cannot do a single test, as refB might occur before refA in the
4024 inner loops, and the contrary when considering outer loops: ex.
4025
4026 | loop_0
4027 | loop_1
4028 | loop_2
4029 | T[{1,+,1}_2][{1,+,1}_1] // refA
4030 | T[{2,+,1}_2][{0,+,1}_1] // refB
4031 | endloop_2
4032 | endloop_1
4033 | endloop_0
4034
4035 refB touches the elements in T before refA, and thus for the same
4036 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4037 but for successive loop_0 iterations, we have (1, -1, 1)
4038
4039 The Omega solver expects the distance variables ("di" in the
4040 previous example) to come first in the constraint system (as
4041 variables to be protected, or "safe" variables), the constraint
4042 system is built using the following layout:
4043
4044 "cst | distance vars | index vars".
4045*/
4046
4047static bool
4048init_omega_for_ddr (struct data_dependence_relation *ddr,
4049 bool *maybe_dependent)
4050{
4051 omega_pb pb;
4052 bool res = false;
4053
4054 *maybe_dependent = true;
4055
4056 if (same_access_functions (ddr))
4057 {
4058 unsigned j;
4059 lambda_vector dir_v;
4060
4061 /* Save the 0 vector. */
4062 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4063 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4064 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4065 dir_v[j] = dir_equal;
4066 save_dir_v (ddr, dir_v);
4067
4068 /* Save the dependences carried by outer loops. */
4069 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4070 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4071 maybe_dependent);
4072 omega_free_problem (pb);
4073 return res;
4074 }
4075
4076 /* Omega expects the protected variables (those that have to be kept
4077 after elimination) to appear first in the constraint system.
4078 These variables are the distance variables. In the following
4079 initialization we declare NB_LOOPS safe variables, and the total
4080 number of variables for the constraint system is 2*NB_LOOPS. */
4081 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4082 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4083 maybe_dependent);
4084 omega_free_problem (pb);
4085
4086 /* Stop computation if not decidable, or no dependence. */
4087 if (res == false || *maybe_dependent == false)
4088 return res;
4089
4090 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4091 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4092 maybe_dependent);
4093 omega_free_problem (pb);
4094
4095 return res;
4096}
4097
4098/* Return true when DDR contains the same information as that stored
4099 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4100
4101static bool
4102ddr_consistent_p (FILE *file,
4103 struct data_dependence_relation *ddr,
9771b263
DN
4104 vec<lambda_vector> dist_vects,
4105 vec<lambda_vector> dir_vects)
3d8864c0
SP
4106{
4107 unsigned int i, j;
4108
4109 /* If dump_file is set, output there. */
4110 if (dump_file && (dump_flags & TDF_DETAILS))
4111 file = dump_file;
4112
9771b263 4113 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
3d8864c0
SP
4114 {
4115 lambda_vector b_dist_v;
4116 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
9771b263 4117 dist_vects.length (),
3d8864c0
SP
4118 DDR_NUM_DIST_VECTS (ddr));
4119
4120 fprintf (file, "Banerjee dist vectors:\n");
9771b263 4121 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
3d8864c0
SP
4122 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4123
4124 fprintf (file, "Omega dist vectors:\n");
4125 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4126 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4127
4128 fprintf (file, "data dependence relation:\n");
4129 dump_data_dependence_relation (file, ddr);
4130
4131 fprintf (file, ")\n");
4132 return false;
4133 }
4134
9771b263 4135 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
3d8864c0
SP
4136 {
4137 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
9771b263 4138 dir_vects.length (),
3d8864c0
SP
4139 DDR_NUM_DIR_VECTS (ddr));
4140 return false;
4141 }
4142
4143 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4144 {
4145 lambda_vector a_dist_v;
4146 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4147
4148 /* Distance vectors are not ordered in the same way in the DDR
4149 and in the DIST_VECTS: search for a matching vector. */
9771b263 4150 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
3d8864c0
SP
4151 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4152 break;
4153
9771b263 4154 if (j == dist_vects.length ())
3d8864c0
SP
4155 {
4156 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4157 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4158 fprintf (file, "not found in Omega dist vectors:\n");
4159 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4160 fprintf (file, "data dependence relation:\n");
4161 dump_data_dependence_relation (file, ddr);
4162 fprintf (file, ")\n");
4163 }
4164 }
4165
4166 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4167 {
4168 lambda_vector a_dir_v;
4169 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4170
4171 /* Direction vectors are not ordered in the same way in the DDR
4172 and in the DIR_VECTS: search for a matching vector. */
9771b263 4173 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
3d8864c0
SP
4174 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4175 break;
4176
9771b263 4177 if (j == dist_vects.length ())
3d8864c0
SP
4178 {
4179 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4180 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4181 fprintf (file, "not found in Omega dir vectors:\n");
4182 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4183 fprintf (file, "data dependence relation:\n");
4184 dump_data_dependence_relation (file, ddr);
4185 fprintf (file, ")\n");
4186 }
4187 }
4188
b8698a0f 4189 return true;
3d8864c0
SP
4190}
4191
da9a21f4
SP
4192/* This computes the affine dependence relation between A and B with
4193 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4194 independence between two accesses, while CHREC_DONT_KNOW is used
4195 for representing the unknown relation.
b8698a0f 4196
56cf8686
SP
4197 Note that it is possible to stop the computation of the dependence
4198 relation the first time we detect a CHREC_KNOWN element for a given
4199 subscript. */
4200
f20132e7 4201void
da9a21f4
SP
4202compute_affine_dependence (struct data_dependence_relation *ddr,
4203 struct loop *loop_nest)
56cf8686
SP
4204{
4205 struct data_reference *dra = DDR_A (ddr);
4206 struct data_reference *drb = DDR_B (ddr);
b8698a0f 4207
56cf8686
SP
4208 if (dump_file && (dump_flags & TDF_DETAILS))
4209 {
36d59cf7 4210 fprintf (dump_file, "(compute_affine_dependence\n");
c4ddde1b
RG
4211 fprintf (dump_file, " stmt_a: ");
4212 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4213 fprintf (dump_file, " stmt_b: ");
4214 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
56cf8686 4215 }
0ff4040e 4216
56cf8686 4217 /* Analyze only when the dependence relation is not yet known. */
1a4571cb 4218 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
56cf8686 4219 {
0ff4040e
SP
4220 dependence_stats.num_dependence_tests++;
4221
da9a21f4
SP
4222 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4223 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3d8864c0 4224 {
bcf1ef00
RB
4225 subscript_dependence_tester (ddr, loop_nest);
4226
3d8864c0
SP
4227 if (flag_check_data_deps)
4228 {
bcf1ef00 4229 /* Dump the dependences from the first algorithm. */
3d8864c0
SP
4230 if (dump_file && (dump_flags & TDF_DETAILS))
4231 {
4232 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4233 dump_data_dependence_relation (dump_file, ddr);
4234 }
4235
4236 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4237 {
4238 bool maybe_dependent;
9771b263 4239 vec<lambda_vector> dir_vects, dist_vects;
3d8864c0
SP
4240
4241 /* Save the result of the first DD analyzer. */
4242 dist_vects = DDR_DIST_VECTS (ddr);
4243 dir_vects = DDR_DIR_VECTS (ddr);
4244
4245 /* Reset the information. */
9771b263
DN
4246 DDR_DIST_VECTS (ddr).create (0);
4247 DDR_DIR_VECTS (ddr).create (0);
3d8864c0
SP
4248
4249 /* Compute the same information using Omega. */
4250 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4251 goto csys_dont_know;
4252
4253 if (dump_file && (dump_flags & TDF_DETAILS))
4254 {
4255 fprintf (dump_file, "Omega Analyzer\n");
4256 dump_data_dependence_relation (dump_file, ddr);
4257 }
4258
4259 /* Check that we get the same information. */
4260 if (maybe_dependent)
4261 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4262 dir_vects));
4263 }
4264 }
3d8864c0 4265 }
b8698a0f 4266
56cf8686
SP
4267 /* As a last case, if the dependence cannot be determined, or if
4268 the dependence is considered too difficult to determine, answer
4269 "don't know". */
4270 else
0ff4040e 4271 {
3d8864c0 4272 csys_dont_know:;
0ff4040e
SP
4273 dependence_stats.num_dependence_undetermined++;
4274
4275 if (dump_file && (dump_flags & TDF_DETAILS))
4276 {
4277 fprintf (dump_file, "Data ref a:\n");
4278 dump_data_reference (dump_file, dra);
4279 fprintf (dump_file, "Data ref b:\n");
4280 dump_data_reference (dump_file, drb);
4281 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4282 }
4283 finalize_ddr_dependent (ddr, chrec_dont_know);
4284 }
56cf8686 4285 }
b8698a0f 4286
56cf8686 4287 if (dump_file && (dump_flags & TDF_DETAILS))
c4ddde1b
RG
4288 {
4289 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4290 fprintf (dump_file, ") -> no dependence\n");
4291 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4292 fprintf (dump_file, ") -> dependence analysis failed\n");
4293 else
4294 fprintf (dump_file, ")\n");
4295 }
56cf8686
SP
4296}
4297
ba42e045
SP
4298/* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4299 the data references in DATAREFS, in the LOOP_NEST. When
ebf78a47 4300 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3881dee9
AB
4301 relations. Return true when successful, i.e. data references number
4302 is small enough to be handled. */
56cf8686 4303
3881dee9 4304bool
9771b263
DN
4305compute_all_dependences (vec<data_reference_p> datarefs,
4306 vec<ddr_p> *dependence_relations,
4307 vec<loop_p> loop_nest,
ebf78a47 4308 bool compute_self_and_rr)
56cf8686 4309{
ebf78a47
SP
4310 struct data_dependence_relation *ddr;
4311 struct data_reference *a, *b;
4312 unsigned int i, j;
56cf8686 4313
9771b263 4314 if ((int) datarefs.length ()
3881dee9
AB
4315 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4316 {
4317 struct data_dependence_relation *ddr;
4318
4319 /* Insert a single relation into dependence_relations:
4320 chrec_dont_know. */
4321 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
9771b263 4322 dependence_relations->safe_push (ddr);
3881dee9
AB
4323 return false;
4324 }
4325
9771b263
DN
4326 FOR_EACH_VEC_ELT (datarefs, i, a)
4327 for (j = i + 1; datarefs.iterate (j, &b); j++)
b0af49c4 4328 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
ebf78a47
SP
4329 {
4330 ddr = initialize_data_dependence_relation (a, b, loop_nest);
9771b263
DN
4331 dependence_relations->safe_push (ddr);
4332 if (loop_nest.exists ())
4333 compute_affine_dependence (ddr, loop_nest[0]);
ebf78a47 4334 }
789246d7 4335
ebf78a47 4336 if (compute_self_and_rr)
9771b263 4337 FOR_EACH_VEC_ELT (datarefs, i, a)
56cf8686 4338 {
ebf78a47 4339 ddr = initialize_data_dependence_relation (a, a, loop_nest);
9771b263
DN
4340 dependence_relations->safe_push (ddr);
4341 if (loop_nest.exists ())
4342 compute_affine_dependence (ddr, loop_nest[0]);
56cf8686 4343 }
3881dee9
AB
4344
4345 return true;
56cf8686
SP
4346}
4347
aeb83f09
RG
4348/* Describes a location of a memory reference. */
4349
4350typedef struct data_ref_loc_d
4351{
a3698dfc
JJ
4352 /* The memory reference. */
4353 tree ref;
aeb83f09 4354
fcac74a1
RB
4355 /* True if the memory reference is read. */
4356 bool is_read;
aeb83f09
RG
4357} data_ref_loc;
4358
aeb83f09 4359
946e1bc7
ZD
4360/* Stores the locations of memory references in STMT to REFERENCES. Returns
4361 true if STMT clobbers memory, false otherwise. */
4362
aeb83f09 4363static bool
ff4c81cc 4364get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references)
946e1bc7
ZD
4365{
4366 bool clobbers_memory = false;
f32682ca 4367 data_ref_loc ref;
a3698dfc 4368 tree op0, op1;
726a989a 4369 enum gimple_code stmt_code = gimple_code (stmt);
946e1bc7 4370
946e1bc7 4371 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
f2ea3c15
RG
4372 As we cannot model data-references to not spelled out
4373 accesses give up if they may occur. */
74bf76ed
JJ
4374 if (stmt_code == GIMPLE_CALL
4375 && !(gimple_call_flags (stmt) & ECF_CONST))
4376 {
4377 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5ce9450f
JJ
4378 if (gimple_call_internal_p (stmt))
4379 switch (gimple_call_internal_fn (stmt))
4380 {
4381 case IFN_GOMP_SIMD_LANE:
4382 {
4383 struct loop *loop = gimple_bb (stmt)->loop_father;
4384 tree uid = gimple_call_arg (stmt, 0);
4385 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4386 if (loop == NULL
4387 || loop->simduid != SSA_NAME_VAR (uid))
4388 clobbers_memory = true;
4389 break;
4390 }
4391 case IFN_MASK_LOAD:
4392 case IFN_MASK_STORE:
4393 break;
4394 default:
74bf76ed 4395 clobbers_memory = true;
5ce9450f
JJ
4396 break;
4397 }
74bf76ed
JJ
4398 else
4399 clobbers_memory = true;
4400 }
4401 else if (stmt_code == GIMPLE_ASM
538dd0b7
DM
4402 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4403 || gimple_vuse (stmt)))
946e1bc7
ZD
4404 clobbers_memory = true;
4405
5006671f 4406 if (!gimple_vuse (stmt))
946e1bc7
ZD
4407 return clobbers_memory;
4408
726a989a 4409 if (stmt_code == GIMPLE_ASSIGN)
946e1bc7 4410 {
c8ae0bec 4411 tree base;
a3698dfc
JJ
4412 op0 = gimple_assign_lhs (stmt);
4413 op1 = gimple_assign_rhs1 (stmt);
b8698a0f 4414
a3698dfc
JJ
4415 if (DECL_P (op1)
4416 || (REFERENCE_CLASS_P (op1)
4417 && (base = get_base_address (op1))
c8ae0bec 4418 && TREE_CODE (base) != SSA_NAME))
946e1bc7 4419 {
a3698dfc 4420 ref.ref = op1;
f32682ca 4421 ref.is_read = true;
9771b263 4422 references->safe_push (ref);
946e1bc7 4423 }
946e1bc7 4424 }
726a989a 4425 else if (stmt_code == GIMPLE_CALL)
946e1bc7 4426 {
1b14d815 4427 unsigned i, n;
ac84e05e 4428
5ce9450f
JJ
4429 ref.is_read = false;
4430 if (gimple_call_internal_p (stmt))
4431 switch (gimple_call_internal_fn (stmt))
4432 {
4433 case IFN_MASK_LOAD:
8e91d222
JJ
4434 if (gimple_call_lhs (stmt) == NULL_TREE)
4435 break;
5ce9450f
JJ
4436 ref.is_read = true;
4437 case IFN_MASK_STORE:
4438 ref.ref = fold_build2 (MEM_REF,
4439 ref.is_read
4440 ? TREE_TYPE (gimple_call_lhs (stmt))
4441 : TREE_TYPE (gimple_call_arg (stmt, 3)),
4442 gimple_call_arg (stmt, 0),
4443 gimple_call_arg (stmt, 1));
4444 references->safe_push (ref);
4445 return false;
4446 default:
4447 break;
4448 }
4449
a3698dfc 4450 op0 = gimple_call_lhs (stmt);
1b14d815 4451 n = gimple_call_num_args (stmt);
ac84e05e 4452 for (i = 0; i < n; i++)
946e1bc7 4453 {
a3698dfc 4454 op1 = gimple_call_arg (stmt, i);
ac84e05e 4455
a3698dfc
JJ
4456 if (DECL_P (op1)
4457 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
946e1bc7 4458 {
a3698dfc 4459 ref.ref = op1;
f32682ca 4460 ref.is_read = true;
9771b263 4461 references->safe_push (ref);
946e1bc7
ZD
4462 }
4463 }
4464 }
1b14d815
RS
4465 else
4466 return clobbers_memory;
946e1bc7 4467
a3698dfc
JJ
4468 if (op0
4469 && (DECL_P (op0)
4470 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
1b14d815 4471 {
a3698dfc 4472 ref.ref = op0;
f32682ca 4473 ref.is_read = false;
9771b263 4474 references->safe_push (ref);
1b14d815 4475 }
946e1bc7
ZD
4476 return clobbers_memory;
4477}
4478
4479/* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3cb960c7 4480 reference, returns false, otherwise returns true. NEST is the outermost
f8bf9252 4481 loop of the loop nest in which the references should be analyzed. */
946e1bc7 4482
f8bf9252 4483bool
726a989a 4484find_data_references_in_stmt (struct loop *nest, gimple stmt,
9771b263 4485 vec<data_reference_p> *datarefs)
946e1bc7
ZD
4486{
4487 unsigned i;
00f96dc9 4488 auto_vec<data_ref_loc, 2> references;
946e1bc7
ZD
4489 data_ref_loc *ref;
4490 bool ret = true;
4491 data_reference_p dr;
4492
4493 if (get_references_in_stmt (stmt, &references))
ff4c81cc 4494 return false;
946e1bc7 4495
9771b263 4496 FOR_EACH_VEC_ELT (references, i, ref)
946e1bc7 4497 {
5c640e29 4498 dr = create_data_ref (nest, loop_containing_stmt (stmt),
a3698dfc 4499 ref->ref, stmt, ref->is_read);
bbc8a8dc 4500 gcc_assert (dr != NULL);
9771b263 4501 datarefs->safe_push (dr);
946e1bc7 4502 }
9771b263 4503 references.release ();
946e1bc7
ZD
4504 return ret;
4505}
4506
5c640e29
SP
4507/* Stores the data references in STMT to DATAREFS. If there is an
4508 unanalyzable reference, returns false, otherwise returns true.
4509 NEST is the outermost loop of the loop nest in which the references
4510 should be instantiated, LOOP is the loop in which the references
4511 should be analyzed. */
ed91d661
SP
4512
4513bool
5c640e29 4514graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
9771b263 4515 vec<data_reference_p> *datarefs)
ed91d661
SP
4516{
4517 unsigned i;
00f96dc9 4518 auto_vec<data_ref_loc, 2> references;
ed91d661
SP
4519 data_ref_loc *ref;
4520 bool ret = true;
4521 data_reference_p dr;
4522
4523 if (get_references_in_stmt (stmt, &references))
ff4c81cc 4524 return false;
ed91d661 4525
9771b263 4526 FOR_EACH_VEC_ELT (references, i, ref)
ed91d661 4527 {
a3698dfc 4528 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
ed91d661 4529 gcc_assert (dr != NULL);
9771b263 4530 datarefs->safe_push (dr);
ed91d661
SP
4531 }
4532
9771b263 4533 references.release ();
ed91d661
SP
4534 return ret;
4535}
4536
a70d6342
IR
4537/* Search the data references in LOOP, and record the information into
4538 DATAREFS. Returns chrec_dont_know when failing to analyze a
4539 difficult case, returns NULL_TREE otherwise. */
4540
bfe068c3 4541tree
a70d6342 4542find_data_references_in_bb (struct loop *loop, basic_block bb,
9771b263 4543 vec<data_reference_p> *datarefs)
a70d6342
IR
4544{
4545 gimple_stmt_iterator bsi;
4546
4547 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4548 {
4549 gimple stmt = gsi_stmt (bsi);
4550
4551 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4552 {
4553 struct data_reference *res;
4554 res = XCNEW (struct data_reference);
9771b263 4555 datarefs->safe_push (res);
a70d6342
IR
4556
4557 return chrec_dont_know;
4558 }
4559 }
4560
4561 return NULL_TREE;
4562}
4563
56cf8686
SP
4564/* Search the data references in LOOP, and record the information into
4565 DATAREFS. Returns chrec_dont_know when failing to analyze a
4566 difficult case, returns NULL_TREE otherwise.
3cb960c7 4567
464f49d8
DB
4568 TODO: This function should be made smarter so that it can handle address
4569 arithmetic as if they were array accesses, etc. */
56cf8686 4570
fcac74a1 4571tree
ebf78a47 4572find_data_references_in_loop (struct loop *loop,
9771b263 4573 vec<data_reference_p> *datarefs)
56cf8686 4574{
ccbdbf0a
JL
4575 basic_block bb, *bbs;
4576 unsigned int i;
86df10e3 4577
bbc8a8dc 4578 bbs = get_loop_body_in_dom_order (loop);
ccbdbf0a
JL
4579
4580 for (i = 0; i < loop->num_nodes; i++)
56cf8686 4581 {
ccbdbf0a
JL
4582 bb = bbs[i];
4583
a70d6342
IR
4584 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4585 {
4586 free (bbs);
4587 return chrec_dont_know;
4588 }
56cf8686 4589 }
ccbdbf0a
JL
4590 free (bbs);
4591
4aad410d 4592 return NULL_TREE;
56cf8686
SP
4593}
4594
ba42e045
SP
4595/* Recursive helper function. */
4596
4597static bool
9771b263 4598find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
ba42e045
SP
4599{
4600 /* Inner loops of the nest should not contain siblings. Example:
4601 when there are two consecutive loops,
4602
4603 | loop_0
4604 | loop_1
4605 | A[{0, +, 1}_1]
4606 | endloop_1
4607 | loop_2
4608 | A[{0, +, 1}_2]
4609 | endloop_2
4610 | endloop_0
4611
4612 the dependence relation cannot be captured by the distance
4613 abstraction. */
4614 if (loop->next)
4615 return false;
56cf8686 4616
9771b263 4617 loop_nest->safe_push (loop);
ba42e045
SP
4618 if (loop->inner)
4619 return find_loop_nest_1 (loop->inner, loop_nest);
4620 return true;
4621}
4622
4623/* Return false when the LOOP is not well nested. Otherwise return
4624 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4625 contain the loops from the outermost to the innermost, as they will
4626 appear in the classic distance vector. */
4627
5417e022 4628bool
9771b263 4629find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
ba42e045 4630{
9771b263 4631 loop_nest->safe_push (loop);
ba42e045
SP
4632 if (loop->inner)
4633 return find_loop_nest_1 (loop->inner, loop_nest);
4634 return true;
4635}
56cf8686 4636
9f275479
JS
4637/* Returns true when the data dependences have been computed, false otherwise.
4638 Given a loop nest LOOP, the following vectors are returned:
b8698a0f
L
4639 DATAREFS is initialized to all the array elements contained in this loop,
4640 DEPENDENCE_RELATIONS contains the relations between the data references.
4641 Compute read-read and self relations if
86a07404 4642 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
56cf8686 4643
9f275479 4644bool
b8698a0f 4645compute_data_dependences_for_loop (struct loop *loop,
86a07404 4646 bool compute_self_and_read_read_dependences,
9771b263
DN
4647 vec<loop_p> *loop_nest,
4648 vec<data_reference_p> *datarefs,
4649 vec<ddr_p> *dependence_relations)
56cf8686 4650{
9f275479 4651 bool res = true;
86a07404 4652
0ff4040e 4653 memset (&dependence_stats, 0, sizeof (dependence_stats));
56cf8686 4654
b8698a0f 4655 /* If the loop nest is not well formed, or one of the data references
ba42e045
SP
4656 is not computable, give up without spending time to compute other
4657 dependences. */
3cb960c7 4658 if (!loop
01be8516 4659 || !find_loop_nest (loop, loop_nest)
3881dee9
AB
4660 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4661 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4662 compute_self_and_read_read_dependences))
4663 res = false;
0ff4040e
SP
4664
4665 if (dump_file && (dump_flags & TDF_STATS))
56cf8686 4666 {
0ff4040e
SP
4667 fprintf (dump_file, "Dependence tester statistics:\n");
4668
b8698a0f 4669 fprintf (dump_file, "Number of dependence tests: %d\n",
0ff4040e 4670 dependence_stats.num_dependence_tests);
b8698a0f 4671 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
0ff4040e 4672 dependence_stats.num_dependence_dependent);
b8698a0f 4673 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
0ff4040e 4674 dependence_stats.num_dependence_independent);
b8698a0f 4675 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
0ff4040e
SP
4676 dependence_stats.num_dependence_undetermined);
4677
b8698a0f 4678 fprintf (dump_file, "Number of subscript tests: %d\n",
0ff4040e 4679 dependence_stats.num_subscript_tests);
b8698a0f 4680 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
0ff4040e 4681 dependence_stats.num_subscript_undetermined);
b8698a0f 4682 fprintf (dump_file, "Number of same subscript function: %d\n",
0ff4040e
SP
4683 dependence_stats.num_same_subscript_function);
4684
4685 fprintf (dump_file, "Number of ziv tests: %d\n",
4686 dependence_stats.num_ziv);
4687 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4688 dependence_stats.num_ziv_dependent);
4689 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4690 dependence_stats.num_ziv_independent);
4691 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
b8698a0f 4692 dependence_stats.num_ziv_unimplemented);
0ff4040e 4693
b8698a0f 4694 fprintf (dump_file, "Number of siv tests: %d\n",
0ff4040e
SP
4695 dependence_stats.num_siv);
4696 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4697 dependence_stats.num_siv_dependent);
4698 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4699 dependence_stats.num_siv_independent);
4700 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4701 dependence_stats.num_siv_unimplemented);
4702
b8698a0f 4703 fprintf (dump_file, "Number of miv tests: %d\n",
0ff4040e
SP
4704 dependence_stats.num_miv);
4705 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4706 dependence_stats.num_miv_dependent);
4707 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4708 dependence_stats.num_miv_independent);
4709 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4710 dependence_stats.num_miv_unimplemented);
9f275479
JS
4711 }
4712
4713 return res;
56cf8686
SP
4714}
4715
b8698a0f 4716/* Returns true when the data dependences for the basic block BB have been
a70d6342 4717 computed, false otherwise.
b8698a0f 4718 DATAREFS is initialized to all the array elements contained in this basic
a70d6342
IR
4719 block, DEPENDENCE_RELATIONS contains the relations between the data
4720 references. Compute read-read and self relations if
4721 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4722bool
4723compute_data_dependences_for_bb (basic_block bb,
4724 bool compute_self_and_read_read_dependences,
9771b263
DN
4725 vec<data_reference_p> *datarefs,
4726 vec<ddr_p> *dependence_relations)
a70d6342
IR
4727{
4728 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4729 return false;
4730
6e1aa848 4731 return compute_all_dependences (*datarefs, dependence_relations, vNULL,
3881dee9 4732 compute_self_and_read_read_dependences);
a70d6342
IR
4733}
4734
56cf8686 4735/* Entry point (for testing only). Analyze all the data references
3d8864c0 4736 and the dependence relations in LOOP.
56cf8686 4737
b8698a0f
L
4738 The data references are computed first.
4739
56cf8686
SP
4740 A relation on these nodes is represented by a complete graph. Some
4741 of the relations could be of no interest, thus the relations can be
4742 computed on demand.
b8698a0f 4743
56cf8686
SP
4744 In the following function we compute all the relations. This is
4745 just a first implementation that is here for:
b8698a0f 4746 - for showing how to ask for the dependence relations,
56cf8686
SP
4747 - for the debugging the whole dependence graph,
4748 - for the dejagnu testcases and maintenance.
b8698a0f 4749
56cf8686
SP
4750 It is possible to ask only for a part of the graph, avoiding to
4751 compute the whole dependence graph. The computed dependences are
4752 stored in a knowledge base (KB) such that later queries don't
4753 recompute the same information. The implementation of this KB is
4754 transparent to the optimizer, and thus the KB can be changed with a
4755 more efficient implementation, or the KB could be disabled. */
b8698a0f 4756static void
3d8864c0 4757analyze_all_data_dependences (struct loop *loop)
56cf8686
SP
4758{
4759 unsigned int i;
56cf8686 4760 int nb_data_refs = 10;
9771b263
DN
4761 vec<data_reference_p> datarefs;
4762 datarefs.create (nb_data_refs);
4763 vec<ddr_p> dependence_relations;
4764 dependence_relations.create (nb_data_refs * nb_data_refs);
4765 vec<loop_p> loop_nest;
4766 loop_nest.create (3);
56cf8686
SP
4767
4768 /* Compute DDs on the whole function. */
01be8516 4769 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
3d8864c0 4770 &dependence_relations);
56cf8686
SP
4771
4772 if (dump_file)
4773 {
4774 dump_data_dependence_relations (dump_file, dependence_relations);
4775 fprintf (dump_file, "\n\n");
56cf8686 4776
86df10e3
SP
4777 if (dump_flags & TDF_DETAILS)
4778 dump_dist_dir_vectors (dump_file, dependence_relations);
56cf8686 4779
86df10e3 4780 if (dump_flags & TDF_STATS)
56cf8686 4781 {
86df10e3
SP
4782 unsigned nb_top_relations = 0;
4783 unsigned nb_bot_relations = 0;
86df10e3 4784 unsigned nb_chrec_relations = 0;
ebf78a47 4785 struct data_dependence_relation *ddr;
86df10e3 4786
9771b263 4787 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
86df10e3 4788 {
86df10e3
SP
4789 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4790 nb_top_relations++;
b8698a0f 4791
86df10e3 4792 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5006671f 4793 nb_bot_relations++;
b8698a0f
L
4794
4795 else
86df10e3
SP
4796 nb_chrec_relations++;
4797 }
b8698a0f 4798
86df10e3
SP
4799 gather_stats_on_scev_database ();
4800 }
56cf8686 4801 }
36d59cf7 4802
9771b263 4803 loop_nest.release ();
36d59cf7
DB
4804 free_dependence_relations (dependence_relations);
4805 free_data_refs (datarefs);
4806}
3d8864c0
SP
4807
4808/* Computes all the data dependences and check that the results of
4809 several analyzers are the same. */
4810
4811void
4812tree_check_data_deps (void)
4813{
3d8864c0
SP
4814 struct loop *loop_nest;
4815
f0bd40b1 4816 FOR_EACH_LOOP (loop_nest, 0)
3d8864c0
SP
4817 analyze_all_data_dependences (loop_nest);
4818}
36d59cf7
DB
4819
4820/* Free the memory used by a data dependence relation DDR. */
4821
4822void
4823free_dependence_relation (struct data_dependence_relation *ddr)
4824{
4825 if (ddr == NULL)
4826 return;
4827
9771b263 4828 if (DDR_SUBSCRIPTS (ddr).exists ())
d93817c4 4829 free_subscripts (DDR_SUBSCRIPTS (ddr));
9771b263
DN
4830 DDR_DIST_VECTS (ddr).release ();
4831 DDR_DIR_VECTS (ddr).release ();
ebf78a47 4832
36d59cf7
DB
4833 free (ddr);
4834}
4835
4836/* Free the memory used by the data dependence relations from
4837 DEPENDENCE_RELATIONS. */
4838
b8698a0f 4839void
9771b263 4840free_dependence_relations (vec<ddr_p> dependence_relations)
36d59cf7
DB
4841{
4842 unsigned int i;
ebf78a47 4843 struct data_dependence_relation *ddr;
36d59cf7 4844
9771b263 4845 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
01be8516 4846 if (ddr)
3ac57120 4847 free_dependence_relation (ddr);
ebf78a47 4848
9771b263 4849 dependence_relations.release ();
56cf8686
SP
4850}
4851
36d59cf7
DB
4852/* Free the memory used by the data references from DATAREFS. */
4853
4854void
9771b263 4855free_data_refs (vec<data_reference_p> datarefs)
36d59cf7
DB
4856{
4857 unsigned int i;
ebf78a47 4858 struct data_reference *dr;
56cf8686 4859
9771b263 4860 FOR_EACH_VEC_ELT (datarefs, i, dr)
8fdbc9c6 4861 free_data_ref (dr);
9771b263 4862 datarefs.release ();
36d59cf7 4863}