]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-data-ref.c
2011-03-22 Richard Guenther <rguenther@suse.de>
[thirdparty/gcc.git] / gcc / tree-data-ref.c
CommitLineData
2146e26d 1/* Data references and dependences detectors.
7cf0dbf3 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
cfaf579d 3 Free Software Foundation, Inc.
6b421feb 4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
2146e26d 5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
8c4c00c1 10Software Foundation; either version 3, or (at your option) any later
2146e26d 11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
8c4c00c1 19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
2146e26d 21
22/* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
48e1416a 24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
2146e26d 29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
48e1416a 31
2146e26d 32 The goals of this analysis are:
48e1416a 33
2146e26d 34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
48e1416a 37
2146e26d 38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
48e1416a 40
2146e26d 41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
48e1416a 46
47 - to define a knowledge base for storing the data dependence
2146e26d 48 information,
48e1416a 49
2146e26d 50 - to define an interface to access this data.
48e1416a 51
52
2146e26d 53 Definitions:
48e1416a 54
2146e26d 55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
48e1416a 61 solutions are integer constants, for example the equation
2146e26d 62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
48e1416a 64
2146e26d 65 References:
48e1416a 66
2146e26d 67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
48e1416a 69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
2146e26d 72 by Utpal Banerjee.
73
48e1416a 74
2146e26d 75*/
76
77#include "config.h"
78#include "system.h"
79#include "coretypes.h"
ce084dfc 80#include "gimple-pretty-print.h"
2146e26d 81#include "tree-flow.h"
2146e26d 82#include "cfgloop.h"
2146e26d 83#include "tree-data-ref.h"
84#include "tree-scalar-evolution.h"
85#include "tree-pass.h"
faa56cf9 86#include "langhooks.h"
2146e26d 87
6b421feb 88static struct datadep_stats
89{
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
94
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
98
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
103
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
108
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113} dependence_stats;
114
b44d1046 115static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
8c7b1d8e 117 struct data_reference *,
118 struct loop *);
2146e26d 119/* Returns true iff A divides B. */
120
48e1416a 121static inline bool
7ecb5bb2 122tree_fold_divides_p (const_tree a, const_tree b)
2146e26d 123{
426a138f 124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
2146e26d 127}
128
bc3c8ad4 129/* Returns true iff A divides B. */
130
48e1416a 131static inline bool
bc3c8ad4 132int_divides_p (int a, int b)
133{
134 return ((b % a) == 0);
2146e26d 135}
136
137\f
138
48e1416a 139/* Dump into FILE all the data references from DATAREFS. */
2146e26d 140
48e1416a 141void
41c7a324 142dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
2146e26d 143{
144 unsigned int i;
41c7a324 145 struct data_reference *dr;
146
48148244 147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
41c7a324 148 dump_data_reference (file, dr);
2146e26d 149}
150
48e1416a 151/* Dump into STDERR all the data references from DATAREFS. */
5df4cc8d 152
4b987fac 153DEBUG_FUNCTION void
5df4cc8d 154debug_data_references (VEC (data_reference_p, heap) *datarefs)
155{
156 dump_data_references (stderr, datarefs);
157}
158
48e1416a 159/* Dump to STDERR all the dependence relations from DDRS. */
801c5610 160
4b987fac 161DEBUG_FUNCTION void
801c5610 162debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
163{
164 dump_data_dependence_relations (stderr, ddrs);
165}
166
48e1416a 167/* Dump into FILE all the dependence relations from DDRS. */
2146e26d 168
48e1416a 169void
170dump_data_dependence_relations (FILE *file,
41c7a324 171 VEC (ddr_p, heap) *ddrs)
2146e26d 172{
173 unsigned int i;
41c7a324 174 struct data_dependence_relation *ddr;
175
48148244 176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
41c7a324 177 dump_data_dependence_relation (file, ddr);
2146e26d 178}
179
5df4cc8d 180/* Print to STDERR the data_reference DR. */
181
4b987fac 182DEBUG_FUNCTION void
5df4cc8d 183debug_data_reference (struct data_reference *dr)
184{
185 dump_data_reference (stderr, dr);
186}
187
2146e26d 188/* Dump function for a DATA_REFERENCE structure. */
189
48e1416a 190void
191dump_data_reference (FILE *outf,
2146e26d 192 struct data_reference *dr)
193{
194 unsigned int i;
48e1416a 195
b40e3432 196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
75a70cf9 199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
5dc5fe13 200 fprintf (outf, "# ref: ");
2146e26d 201 print_generic_stmt (outf, DR_REF (dr), 0);
5dc5fe13 202 fprintf (outf, "# base_object: ");
516849c7 203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
48e1416a 204
2146e26d 205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
206 {
5dc5fe13 207 fprintf (outf, "# Access function %d: ", i);
2146e26d 208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
209 }
5dc5fe13 210 fprintf (outf, "#)\n");
2146e26d 211}
212
87da4f2e 213/* Dumps the affine function described by FN to the file OUTF. */
214
215static void
216dump_affine_function (FILE *outf, affine_fn fn)
217{
218 unsigned i;
219 tree coef;
220
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
223 {
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
227 }
228}
229
230/* Dumps the conflict function CF to the file OUTF. */
231
232static void
233dump_conflict_function (FILE *outf, conflict_function *cf)
234{
235 unsigned i;
236
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
241 else
242 {
243 for (i = 0; i < cf->n; i++)
244 {
245 fprintf (outf, "[");
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
248 }
249 }
250}
251
bc3c8ad4 252/* Dump function for a SUBSCRIPT structure. */
253
48e1416a 254void
bc3c8ad4 255dump_subscript (FILE *outf, struct subscript *subscript)
256{
87da4f2e 257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
bc3c8ad4 258
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
87da4f2e 261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
bc3c8ad4 263 {
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
267 }
48e1416a 268
87da4f2e 269 cf = SUB_CONFLICTS_IN_B (subscript);
bc3c8ad4 270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
87da4f2e 271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
bc3c8ad4 273 {
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
277 }
278
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
283}
284
6b421feb 285/* Print the classic direction vector DIRV to OUTF. */
286
287void
288print_direction_vector (FILE *outf,
289 lambda_vector dirv,
290 int length)
291{
292 int eq;
293
294 for (eq = 0; eq < length; eq++)
295 {
bc620c5c 296 enum data_dependence_direction dir = ((enum data_dependence_direction)
297 dirv[eq]);
6b421feb 298
299 switch (dir)
300 {
301 case dir_positive:
302 fprintf (outf, " +");
303 break;
304 case dir_negative:
305 fprintf (outf, " -");
306 break;
307 case dir_equal:
308 fprintf (outf, " =");
309 break;
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
312 break;
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
315 break;
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
318 break;
319 case dir_star:
320 fprintf (outf, " *");
321 break;
322 default:
323 fprintf (outf, "indep");
324 break;
325 }
326 }
327 fprintf (outf, "\n");
328}
329
b44d1046 330/* Print a vector of direction vectors. */
331
332void
333print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
334 int length)
335{
336 unsigned j;
337 lambda_vector v;
338
48148244 339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
b44d1046 340 print_direction_vector (outf, v, length);
341}
342
e01f9f1f 343/* Print out a vector VEC of length N to OUTFILE. */
344
345static inline void
346print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
347{
348 int i;
349
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
353}
354
b44d1046 355/* Print a vector of distance vectors. */
356
357void
358print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
359 int length)
360{
361 unsigned j;
362 lambda_vector v;
363
48148244 364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
b44d1046 365 print_lambda_vector (outf, v, length);
366}
367
368/* Debug version. */
369
4b987fac 370DEBUG_FUNCTION void
b44d1046 371debug_data_dependence_relation (struct data_dependence_relation *ddr)
372{
373 dump_data_dependence_relation (stderr, ddr);
374}
375
2146e26d 376/* Dump function for a DATA_DEPENDENCE_RELATION structure. */
377
48e1416a 378void
379dump_data_dependence_relation (FILE *outf,
2146e26d 380 struct data_dependence_relation *ddr)
381{
2146e26d 382 struct data_reference *dra, *drb;
bc3c8ad4 383
bc3c8ad4 384 fprintf (outf, "(Data Dep: \n");
801c5610 385
4cc34f6b 386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
387 {
be2e5c02 388 if (ddr)
389 {
390 dra = DDR_A (ddr);
391 drb = DDR_B (ddr);
392 if (dra)
393 dump_data_reference (outf, dra);
394 else
395 fprintf (outf, " (nil)\n");
396 if (drb)
397 dump_data_reference (outf, drb);
398 else
399 fprintf (outf, " (nil)\n");
400 }
4cc34f6b 401 fprintf (outf, " (don't know)\n)\n");
402 return;
403 }
404
405 dra = DDR_A (ddr);
406 drb = DDR_B (ddr);
801c5610 407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
409
4cc34f6b 410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2146e26d 411 fprintf (outf, " (no dependence)\n");
48e1416a 412
bc3c8ad4 413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2146e26d 414 {
bc3c8ad4 415 unsigned int i;
b44d1046 416 struct loop *loopi;
1532ec98 417
2146e26d 418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
419 {
2146e26d 420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
bc3c8ad4 424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
2146e26d 425 }
1532ec98 426
355572cc 427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
b44d1046 428 fprintf (outf, " loop nest: (");
48148244 429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
b44d1046 430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
432
1532ec98 433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
2146e26d 434 {
1532ec98 435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
b44d1046 437 DDR_NB_LOOPS (ddr));
bc3c8ad4 438 }
1532ec98 439
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
bc3c8ad4 441 {
1532ec98 442 fprintf (outf, " direction_vector: ");
6b421feb 443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
b44d1046 444 DDR_NB_LOOPS (ddr));
2146e26d 445 }
2146e26d 446 }
447
448 fprintf (outf, ")\n");
449}
450
2146e26d 451/* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
452
453void
48e1416a 454dump_data_dependence_direction (FILE *file,
2146e26d 455 enum data_dependence_direction dir)
456{
457 switch (dir)
458 {
48e1416a 459 case dir_positive:
2146e26d 460 fprintf (file, "+");
461 break;
48e1416a 462
2146e26d 463 case dir_negative:
464 fprintf (file, "-");
465 break;
48e1416a 466
2146e26d 467 case dir_equal:
468 fprintf (file, "=");
469 break;
48e1416a 470
2146e26d 471 case dir_positive_or_negative:
472 fprintf (file, "+-");
473 break;
48e1416a 474
475 case dir_positive_or_equal:
2146e26d 476 fprintf (file, "+=");
477 break;
48e1416a 478
479 case dir_negative_or_equal:
2146e26d 480 fprintf (file, "-=");
481 break;
48e1416a 482
483 case dir_star:
484 fprintf (file, "*");
2146e26d 485 break;
48e1416a 486
487 default:
2146e26d 488 break;
489 }
490}
491
bc3c8ad4 492/* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
495 considered nest. */
496
48e1416a 497void
41c7a324 498dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
bc3c8ad4 499{
1532ec98 500 unsigned int i, j;
41c7a324 501 struct data_dependence_relation *ddr;
502 lambda_vector v;
bc3c8ad4 503
48148244 504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
41c7a324 505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
506 {
48148244 507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
41c7a324 508 {
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
512 }
513
48148244 514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
41c7a324 515 {
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
519 }
520 }
1532ec98 521
bc3c8ad4 522 fprintf (file, "\n\n");
523}
524
525/* Dumps the data dependence relations DDRS in FILE. */
526
48e1416a 527void
41c7a324 528dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
bc3c8ad4 529{
530 unsigned int i;
41c7a324 531 struct data_dependence_relation *ddr;
532
48148244 533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
41c7a324 534 dump_data_dependence_relation (file, ddr);
bc3c8ad4 535
bc3c8ad4 536 fprintf (file, "\n\n");
537}
538
75a70cf9 539/* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
543 is returned. */
516849c7 544
75a70cf9 545static bool
546split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
516849c7 548{
80bb306a 549 tree var0, var1;
550 tree off0, off1;
75a70cf9 551 enum tree_code ocode = code;
516849c7 552
75a70cf9 553 *var = NULL_TREE;
554 *off = NULL_TREE;
516849c7 555
0de36bdb 556 switch (code)
516849c7 557 {
80bb306a 558 case INTEGER_CST:
559 *var = build_int_cst (type, 0);
75a70cf9 560 *off = fold_convert (ssizetype, op0);
561 return true;
516849c7 562
0de36bdb 563 case POINTER_PLUS_EXPR:
75a70cf9 564 ocode = PLUS_EXPR;
0de36bdb 565 /* FALLTHROUGH */
80bb306a 566 case PLUS_EXPR:
567 case MINUS_EXPR:
75a70cf9 568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
572 return true;
516849c7 573
516849c7 574 case MULT_EXPR:
75a70cf9 575 if (TREE_CODE (op1) != INTEGER_CST)
576 return false;
80bb306a 577
75a70cf9 578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
581 return true;
516849c7 582
80bb306a 583 case ADDR_EXPR:
584 {
75a70cf9 585 tree base, poffset;
80bb306a 586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
516849c7 589
97d819e5 590 op0 = TREE_OPERAND (op0, 0);
75a70cf9 591 if (!handled_component_p (op0))
592 return false;
516849c7 593
75a70cf9 594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
80bb306a 595 &pmode, &punsignedp, &pvolatilep, false);
516849c7 596
80bb306a 597 if (pbitpos % BITS_PER_UNIT != 0)
75a70cf9 598 return false;
80bb306a 599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
516849c7 601
80bb306a 602 if (poffset)
603 {
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
55ed4df6 606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
608 base, fold_convert (sizetype, poffset));
609 else
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
80bb306a 612 }
613
69a83716 614 var0 = fold_convert (type, base);
615
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
d9659041 621 if (CONVERT_EXPR_P (var0))
69a83716 622 {
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
627 } */
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
75a70cf9 631 return false;
69a83716 632
633 *var = var0;
80bb306a 634 *off = off0;
75a70cf9 635 return true;
80bb306a 636 }
516849c7 637
a091367a 638 case SSA_NAME:
639 {
75a70cf9 640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
a091367a 642
75a70cf9 643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
644 return false;
645
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
649
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
a091367a 651 }
be2e5c02 652 CASE_CONVERT:
653 {
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
663 {
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
666 return true;
667 }
668 return false;
669 }
a091367a 670
516849c7 671 default:
75a70cf9 672 return false;
516849c7 673 }
75a70cf9 674}
675
676/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
678
679void
680split_constant_offset (tree exp, tree *var, tree *off)
681{
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
683 enum tree_code code;
516849c7 684
75a70cf9 685 *var = exp;
80bb306a 686 *off = ssize_int (0);
75a70cf9 687 STRIP_NOPS (exp);
688
c86930b0 689 if (tree_is_chrec (exp))
75a70cf9 690 return;
691
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
696 {
697 *var = fold_convert (type, e);
698 *off = o;
699 }
516849c7 700}
701
80bb306a 702/* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
516849c7 704
705static tree
80bb306a 706canonicalize_base_object_address (tree addr)
516849c7 707{
ad4a85ad 708 tree orig = addr;
709
80bb306a 710 STRIP_NOPS (addr);
516849c7 711
ad4a85ad 712 /* The base address may be obtained by casting from integer, in that case
713 keep the cast. */
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
715 return orig;
716
80bb306a 717 if (TREE_CODE (addr) != ADDR_EXPR)
718 return addr;
516849c7 719
80bb306a 720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
516849c7 721}
722
48e1416a 723/* Analyzes the behavior of the memory reference DR in the innermost loop or
37545e54 724 basic block that contains it. Returns true if analysis succeed or false
725 otherwise. */
516849c7 726
880734c8 727bool
80bb306a 728dr_analyze_innermost (struct data_reference *dr)
516849c7 729{
75a70cf9 730 gimple stmt = DR_STMT (dr);
80bb306a 731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
516849c7 733 HOST_WIDE_INT pbitsize, pbitpos;
80bb306a 734 tree base, poffset;
516849c7 735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
80bb306a 737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
37545e54 739 bool in_loop = (loop && loop->num);
80bb306a 740
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
516849c7 743
80bb306a 744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
516849c7 747
80bb306a 748 if (pbitpos % BITS_PER_UNIT != 0)
516849c7 749 {
80bb306a 750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
880734c8 752 return false;
80bb306a 753 }
516849c7 754
182cf5a9 755 if (TREE_CODE (base) == MEM_REF)
756 {
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
758 {
759 if (!poffset)
760 {
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
763 }
764 else
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
766 }
767 base = TREE_OPERAND (base, 0);
768 }
769 else
770 base = build_fold_addr_expr (base);
37545e54 771 if (in_loop)
80bb306a 772 {
48e1416a 773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
37545e54 774 false))
775 {
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
778 return false;
779 }
780 }
781 else
782 {
783 base_iv.base = base;
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
80bb306a 786 }
37545e54 787
dd2644cf 788 if (!poffset)
80bb306a 789 {
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
792 }
dd2644cf 793 else
80bb306a 794 {
dd2644cf 795 if (!in_loop)
796 {
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
799 }
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
802 {
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
805 " affine.\n");
806 return false;
807 }
80bb306a 808 }
516849c7 809
80bb306a 810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
516849c7 815
80bb306a 816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
516849c7 819
80bb306a 820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
516849c7 821
80bb306a 822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
823 DR_INIT (dr) = init;
824 DR_STEP (dr) = step;
516849c7 825
80bb306a 826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
516849c7 827
80bb306a 828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
880734c8 830
831 return true;
80bb306a 832}
516849c7 833
80bb306a 834/* Determines the base object and the list of indices of memory reference
221a697e 835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
516849c7 836
80bb306a 837static void
221a697e 838dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
80bb306a 839{
80bb306a 840 VEC (tree, heap) *access_fns = NULL;
841 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
37545e54 842 tree base, off, access_fn = NULL_TREE;
843 basic_block before_loop = NULL;
48e1416a 844
37545e54 845 if (nest)
846 before_loop = block_before_loop (nest);
48e1416a 847
80bb306a 848 while (handled_component_p (aref))
516849c7 849 {
80bb306a 850 if (TREE_CODE (aref) == ARRAY_REF)
516849c7 851 {
80bb306a 852 op = TREE_OPERAND (aref, 1);
37545e54 853 if (nest)
854 {
855 access_fn = analyze_scalar_evolution (loop, op);
856 access_fn = instantiate_scev (before_loop, loop, access_fn);
857 VEC_safe_push (tree, heap, access_fns, access_fn);
858 }
80bb306a 859
860 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
516849c7 861 }
48e1416a 862
80bb306a 863 aref = TREE_OPERAND (aref, 0);
516849c7 864 }
865
182cf5a9 866 if (nest
867 && (INDIRECT_REF_P (aref)
868 || TREE_CODE (aref) == MEM_REF))
516849c7 869 {
80bb306a 870 op = TREE_OPERAND (aref, 0);
871 access_fn = analyze_scalar_evolution (loop, op);
089aa668 872 access_fn = instantiate_scev (before_loop, loop, access_fn);
80bb306a 873 base = initial_condition (access_fn);
874 split_constant_offset (base, &base, &off);
182cf5a9 875 if (TREE_CODE (aref) == MEM_REF)
876 off = size_binop (PLUS_EXPR, off,
877 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
80bb306a 878 access_fn = chrec_replace_initial_condition (access_fn,
879 fold_convert (TREE_TYPE (base), off));
880
881 TREE_OPERAND (aref, 0) = base;
882 VEC_safe_push (tree, heap, access_fns, access_fn);
516849c7 883 }
516849c7 884
182cf5a9 885 if (TREE_CODE (aref) == MEM_REF)
886 TREE_OPERAND (aref, 1)
887 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
888
889 if (TREE_CODE (ref) == MEM_REF
890 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
891 && integer_zerop (TREE_OPERAND (ref, 1)))
892 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
893
894 /* For canonicalization purposes we'd like to strip all outermost
895 zero-offset component-refs.
896 ??? For now simply handle zero-index array-refs. */
897 while (TREE_CODE (ref) == ARRAY_REF
898 && integer_zerop (TREE_OPERAND (ref, 1)))
899 ref = TREE_OPERAND (ref, 0);
900
80bb306a 901 DR_BASE_OBJECT (dr) = ref;
902 DR_ACCESS_FNS (dr) = access_fns;
516849c7 903}
904
80bb306a 905/* Extracts the alias analysis information from the memory reference DR. */
516849c7 906
80bb306a 907static void
908dr_analyze_alias (struct data_reference *dr)
516849c7 909{
80bb306a 910 tree ref = DR_REF (dr);
dd277d48 911 tree base = get_base_address (ref), addr;
912
182cf5a9 913 if (INDIRECT_REF_P (base)
914 || TREE_CODE (base) == MEM_REF)
80bb306a 915 {
916 addr = TREE_OPERAND (base, 0);
917 if (TREE_CODE (addr) == SSA_NAME)
dd277d48 918 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
80bb306a 919 }
80bb306a 920}
516849c7 921
80bb306a 922/* Returns true if the address of DR is invariant. */
516849c7 923
80bb306a 924static bool
925dr_address_invariant_p (struct data_reference *dr)
926{
927 unsigned i;
928 tree idx;
929
48148244 930 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
80bb306a 931 if (tree_contains_chrecs (idx, NULL))
932 return false;
72443169 933
72443169 934 return true;
516849c7 935}
936
80bb306a 937/* Frees data reference DR. */
31e389a2 938
801c5610 939void
31e389a2 940free_data_ref (data_reference_p dr)
941{
80bb306a 942 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
31e389a2 943 free (dr);
944}
516849c7 945
80bb306a 946/* Analyzes memory reference MEMREF accessed in STMT. The reference
947 is read if IS_READ is true, write otherwise. Returns the
221a697e 948 data_reference description of MEMREF. NEST is the outermost loop
949 in which the reference should be instantiated, LOOP is the loop in
950 which the data reference should be analyzed. */
516849c7 951
5c205353 952struct data_reference *
221a697e 953create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
954 bool is_read)
516849c7 955{
80bb306a 956 struct data_reference *dr;
6b421feb 957
80bb306a 958 if (dump_file && (dump_flags & TDF_DETAILS))
6b421feb 959 {
80bb306a 960 fprintf (dump_file, "Creating dr for ");
961 print_generic_expr (dump_file, memref, TDF_SLIM);
962 fprintf (dump_file, "\n");
6b421feb 963 }
f84a688a 964
80bb306a 965 dr = XCNEW (struct data_reference);
966 DR_STMT (dr) = stmt;
967 DR_REF (dr) = memref;
968 DR_IS_READ (dr) = is_read;
516849c7 969
80bb306a 970 dr_analyze_innermost (dr);
221a697e 971 dr_analyze_indices (dr, nest, loop);
80bb306a 972 dr_analyze_alias (dr);
516849c7 973
974 if (dump_file && (dump_flags & TDF_DETAILS))
975 {
80bb306a 976 fprintf (dump_file, "\tbase_address: ");
516849c7 977 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
978 fprintf (dump_file, "\n\toffset from base address: ");
979 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
980 fprintf (dump_file, "\n\tconstant offset from base address: ");
981 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
516849c7 982 fprintf (dump_file, "\n\tstep: ");
983 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
80bb306a 984 fprintf (dump_file, "\n\taligned to: ");
985 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
986 fprintf (dump_file, "\n\tbase_object: ");
987 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
516849c7 988 fprintf (dump_file, "\n");
80bb306a 989 }
990
48e1416a 991 return dr;
516849c7 992}
993
87da4f2e 994/* Returns true if FNA == FNB. */
995
996static bool
997affine_function_equal_p (affine_fn fna, affine_fn fnb)
998{
999 unsigned i, n = VEC_length (tree, fna);
516849c7 1000
de33470d 1001 if (n != VEC_length (tree, fnb))
1002 return false;
bc3c8ad4 1003
87da4f2e 1004 for (i = 0; i < n; i++)
1005 if (!operand_equal_p (VEC_index (tree, fna, i),
1006 VEC_index (tree, fnb, i), 0))
1007 return false;
1008
1009 return true;
1010}
1011
1012/* If all the functions in CF are the same, returns one of them,
1013 otherwise returns NULL. */
1014
1015static affine_fn
1016common_affine_function (conflict_function *cf)
bc3c8ad4 1017{
87da4f2e 1018 unsigned i;
1019 affine_fn comm;
1020
1021 if (!CF_NONTRIVIAL_P (cf))
1022 return NULL;
1023
1024 comm = cf->fns[0];
1025
1026 for (i = 1; i < cf->n; i++)
1027 if (!affine_function_equal_p (comm, cf->fns[i]))
1028 return NULL;
1029
1030 return comm;
1031}
bc3c8ad4 1032
87da4f2e 1033/* Returns the base of the affine function FN. */
1034
1035static tree
1036affine_function_base (affine_fn fn)
1037{
1038 return VEC_index (tree, fn, 0);
1039}
1040
1041/* Returns true if FN is a constant. */
1042
1043static bool
1044affine_function_constant_p (affine_fn fn)
1045{
1046 unsigned i;
1047 tree coef;
1048
1049 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1050 if (!integer_zerop (coef))
f84a688a 1051 return false;
1052
bc3c8ad4 1053 return true;
1054}
1055
2a21a5df 1056/* Returns true if FN is the zero constant function. */
1057
1058static bool
1059affine_function_zero_p (affine_fn fn)
1060{
1061 return (integer_zerop (affine_function_base (fn))
1062 && affine_function_constant_p (fn));
1063}
1064
9a3dad1c 1065/* Returns a signed integer type with the largest precision from TA
1066 and TB. */
1067
1068static tree
1069signed_type_for_types (tree ta, tree tb)
1070{
1071 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1072 return signed_type_for (ta);
1073 else
1074 return signed_type_for (tb);
1075}
1076
87da4f2e 1077/* Applies operation OP on affine functions FNA and FNB, and returns the
1078 result. */
1079
1080static affine_fn
1081affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1082{
1083 unsigned i, n, m;
1084 affine_fn ret;
1085 tree coef;
1086
1087 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1088 {
1089 n = VEC_length (tree, fna);
1090 m = VEC_length (tree, fnb);
1091 }
1092 else
1093 {
1094 n = VEC_length (tree, fnb);
1095 m = VEC_length (tree, fna);
1096 }
1097
1098 ret = VEC_alloc (tree, heap, m);
1099 for (i = 0; i < n; i++)
9a3dad1c 1100 {
1101 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1102 TREE_TYPE (VEC_index (tree, fnb, i)));
1103
1104 VEC_quick_push (tree, ret,
1105 fold_build2 (op, type,
48e1416a 1106 VEC_index (tree, fna, i),
9a3dad1c 1107 VEC_index (tree, fnb, i)));
1108 }
87da4f2e 1109
1110 for (; VEC_iterate (tree, fna, i, coef); i++)
1111 VEC_quick_push (tree, ret,
9a3dad1c 1112 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
87da4f2e 1113 coef, integer_zero_node));
1114 for (; VEC_iterate (tree, fnb, i, coef); i++)
1115 VEC_quick_push (tree, ret,
9a3dad1c 1116 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
87da4f2e 1117 integer_zero_node, coef));
1118
1119 return ret;
1120}
1121
1122/* Returns the sum of affine functions FNA and FNB. */
1123
1124static affine_fn
1125affine_fn_plus (affine_fn fna, affine_fn fnb)
1126{
1127 return affine_fn_op (PLUS_EXPR, fna, fnb);
1128}
1129
1130/* Returns the difference of affine functions FNA and FNB. */
1131
1132static affine_fn
1133affine_fn_minus (affine_fn fna, affine_fn fnb)
1134{
1135 return affine_fn_op (MINUS_EXPR, fna, fnb);
1136}
1137
1138/* Frees affine function FN. */
1139
1140static void
1141affine_fn_free (affine_fn fn)
1142{
1143 VEC_free (tree, heap, fn);
1144}
1145
bc3c8ad4 1146/* Determine for each subscript in the data dependence relation DDR
1147 the distance. */
2146e26d 1148
6b421feb 1149static void
bc3c8ad4 1150compute_subscript_distance (struct data_dependence_relation *ddr)
2146e26d 1151{
87da4f2e 1152 conflict_function *cf_a, *cf_b;
1153 affine_fn fn_a, fn_b, diff;
1154
2146e26d 1155 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1156 {
1157 unsigned int i;
48e1416a 1158
2146e26d 1159 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1160 {
2146e26d 1161 struct subscript *subscript;
48e1416a 1162
2146e26d 1163 subscript = DDR_SUBSCRIPT (ddr, i);
87da4f2e 1164 cf_a = SUB_CONFLICTS_IN_A (subscript);
1165 cf_b = SUB_CONFLICTS_IN_B (subscript);
bc3c8ad4 1166
87da4f2e 1167 fn_a = common_affine_function (cf_a);
1168 fn_b = common_affine_function (cf_b);
1169 if (!fn_a || !fn_b)
bc3c8ad4 1170 {
87da4f2e 1171 SUB_DISTANCE (subscript) = chrec_dont_know;
1172 return;
bc3c8ad4 1173 }
87da4f2e 1174 diff = affine_fn_minus (fn_a, fn_b);
48e1416a 1175
87da4f2e 1176 if (affine_function_constant_p (diff))
1177 SUB_DISTANCE (subscript) = affine_function_base (diff);
2146e26d 1178 else
1179 SUB_DISTANCE (subscript) = chrec_dont_know;
87da4f2e 1180
1181 affine_fn_free (diff);
2146e26d 1182 }
1183 }
1184}
1185
87da4f2e 1186/* Returns the conflict function for "unknown". */
1187
1188static conflict_function *
1189conflict_fn_not_known (void)
1190{
1191 conflict_function *fn = XCNEW (conflict_function);
1192 fn->n = NOT_KNOWN;
1193
1194 return fn;
1195}
1196
1197/* Returns the conflict function for "independent". */
1198
1199static conflict_function *
1200conflict_fn_no_dependence (void)
1201{
1202 conflict_function *fn = XCNEW (conflict_function);
1203 fn->n = NO_DEPENDENCE;
1204
1205 return fn;
1206}
1207
80bb306a 1208/* Returns true if the address of OBJ is invariant in LOOP. */
1209
1210static bool
7ecb5bb2 1211object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
80bb306a 1212{
1213 while (handled_component_p (obj))
1214 {
1215 if (TREE_CODE (obj) == ARRAY_REF)
1216 {
1217 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1218 need to check the stride and the lower bound of the reference. */
1219 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1220 loop->num)
1221 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1222 loop->num))
1223 return false;
1224 }
1225 else if (TREE_CODE (obj) == COMPONENT_REF)
1226 {
1227 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1228 loop->num))
1229 return false;
1230 }
1231 obj = TREE_OPERAND (obj, 0);
1232 }
1233
182cf5a9 1234 if (!INDIRECT_REF_P (obj)
1235 && TREE_CODE (obj) != MEM_REF)
80bb306a 1236 return true;
1237
1238 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1239 loop->num);
1240}
1241
80bb306a 1242/* Returns false if we can prove that data references A and B do not alias,
1243 true otherwise. */
1244
255b6be7 1245bool
7ecb5bb2 1246dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
80bb306a 1247{
2760c0e3 1248 tree addr_a = DR_BASE_OBJECT (a);
1249 tree addr_b = DR_BASE_OBJECT (b);
80bb306a 1250
9ff25603 1251 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2760c0e3 1252 return refs_output_dependent_p (addr_a, addr_b);
9ff25603 1253 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2760c0e3 1254 return refs_anti_dependent_p (addr_a, addr_b);
1255 return refs_may_alias_p (addr_a, addr_b);
80bb306a 1256}
1257
c127dd86 1258static void compute_self_dependence (struct data_dependence_relation *);
1259
6b421feb 1260/* Initialize a data dependence relation between data accesses A and
1261 B. NB_LOOPS is the number of loops surrounding the references: the
1262 size of the classic distance/direction vectors. */
2146e26d 1263
6b421feb 1264static struct data_dependence_relation *
48e1416a 1265initialize_data_dependence_relation (struct data_reference *a,
6b421feb 1266 struct data_reference *b,
b44d1046 1267 VEC (loop_p, heap) *loop_nest)
2146e26d 1268{
1269 struct data_dependence_relation *res;
6b421feb 1270 unsigned int i;
48e1416a 1271
4c36ffe6 1272 res = XNEW (struct data_dependence_relation);
2146e26d 1273 DDR_A (res) = a;
1274 DDR_B (res) = b;
62761183 1275 DDR_LOOP_NEST (res) = NULL;
0ecb94cf 1276 DDR_REVERSED_P (res) = false;
d534eb36 1277 DDR_SUBSCRIPTS (res) = NULL;
1278 DDR_DIR_VECTS (res) = NULL;
1279 DDR_DIST_VECTS (res) = NULL;
2146e26d 1280
516849c7 1281 if (a == NULL || b == NULL)
1282 {
48e1416a 1283 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
516849c7 1284 return res;
48e1416a 1285 }
516849c7 1286
80bb306a 1287 /* If the data references do not alias, then they are independent. */
1288 if (!dr_may_alias_p (a, b))
516849c7 1289 {
48e1416a 1290 DDR_ARE_DEPENDENT (res) = chrec_known;
516849c7 1291 return res;
1292 }
2146e26d 1293
c127dd86 1294 /* When the references are exactly the same, don't spend time doing
1295 the data dependence tests, just initialize the ddr and return. */
1296 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1297 {
1298 DDR_AFFINE_P (res) = true;
1299 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1300 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1301 DDR_LOOP_NEST (res) = loop_nest;
1302 DDR_INNER_LOOP (res) = 0;
1303 DDR_SELF_REFERENCE (res) = true;
1304 compute_self_dependence (res);
1305 return res;
1306 }
1307
80bb306a 1308 /* If the references do not access the same object, we do not know
1309 whether they alias or not. */
1310 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
2146e26d 1311 {
48e1416a 1312 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
516849c7 1313 return res;
1314 }
6b421feb 1315
80bb306a 1316 /* If the base of the object is not invariant in the loop nest, we cannot
bef304b8 1317 analyze it. TODO -- in fact, it would suffice to record that there may
310d2511 1318 be arbitrary dependences in the loops where the base object varies. */
48e1416a 1319 if (loop_nest
37545e54 1320 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1321 DR_BASE_OBJECT (a)))
516849c7 1322 {
48e1416a 1323 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
516849c7 1324 return res;
1325 }
80bb306a 1326
fabd4538 1327 /* If the number of dimensions of the access to not agree we can have
1328 a pointer access to a component of the array element type and an
1329 array access while the base-objects are still the same. Punt. */
1330 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1331 {
1332 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1333 return res;
1334 }
80bb306a 1335
516849c7 1336 DDR_AFFINE_P (res) = true;
1337 DDR_ARE_DEPENDENT (res) = NULL_TREE;
41c7a324 1338 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
b44d1046 1339 DDR_LOOP_NEST (res) = loop_nest;
355572cc 1340 DDR_INNER_LOOP (res) = 0;
c127dd86 1341 DDR_SELF_REFERENCE (res) = false;
1532ec98 1342
516849c7 1343 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1344 {
1345 struct subscript *subscript;
48e1416a 1346
4c36ffe6 1347 subscript = XNEW (struct subscript);
87da4f2e 1348 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1349 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
516849c7 1350 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1351 SUB_DISTANCE (subscript) = chrec_dont_know;
41c7a324 1352 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
2146e26d 1353 }
41c7a324 1354
2146e26d 1355 return res;
1356}
1357
87da4f2e 1358/* Frees memory used by the conflict function F. */
1359
1360static void
1361free_conflict_function (conflict_function *f)
1362{
1363 unsigned i;
1364
1365 if (CF_NONTRIVIAL_P (f))
1366 {
1367 for (i = 0; i < f->n; i++)
1368 affine_fn_free (f->fns[i]);
1369 }
1370 free (f);
1371}
1372
1373/* Frees memory used by SUBSCRIPTS. */
1374
1375static void
1376free_subscripts (VEC (subscript_p, heap) *subscripts)
1377{
1378 unsigned i;
1379 subscript_p s;
1380
48148244 1381 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
87da4f2e 1382 {
1383 free_conflict_function (s->conflicting_iterations_in_a);
1384 free_conflict_function (s->conflicting_iterations_in_b);
19af51e2 1385 free (s);
87da4f2e 1386 }
1387 VEC_free (subscript_p, heap, subscripts);
1388}
1389
2146e26d 1390/* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1391 description. */
1392
1393static inline void
48e1416a 1394finalize_ddr_dependent (struct data_dependence_relation *ddr,
2146e26d 1395 tree chrec)
1396{
6b6f234c 1397 if (dump_file && (dump_flags & TDF_DETAILS))
1398 {
1399 fprintf (dump_file, "(dependence classified: ");
1400 print_generic_expr (dump_file, chrec, 0);
1401 fprintf (dump_file, ")\n");
1402 }
1403
48e1416a 1404 DDR_ARE_DEPENDENT (ddr) = chrec;
87da4f2e 1405 free_subscripts (DDR_SUBSCRIPTS (ddr));
d534eb36 1406 DDR_SUBSCRIPTS (ddr) = NULL;
2146e26d 1407}
1408
bc3c8ad4 1409/* The dependence relation DDR cannot be represented by a distance
1410 vector. */
1411
1412static inline void
1413non_affine_dependence_relation (struct data_dependence_relation *ddr)
1414{
1415 if (dump_file && (dump_flags & TDF_DETAILS))
1416 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1417
1418 DDR_AFFINE_P (ddr) = false;
1419}
1420
2146e26d 1421\f
1422
1423/* This section contains the classic Banerjee tests. */
1424
1425/* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1426 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1427
1428static inline bool
7ecb5bb2 1429ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2146e26d 1430{
1431 return (evolution_function_is_constant_p (chrec_a)
1432 && evolution_function_is_constant_p (chrec_b));
1433}
1434
1435/* Returns true iff CHREC_A and CHREC_B are dependent on an index
1436 variable, i.e., if the SIV (Single Index Variable) test is true. */
1437
1438static bool
7ecb5bb2 1439siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2146e26d 1440{
1441 if ((evolution_function_is_constant_p (chrec_a)
1442 && evolution_function_is_univariate_p (chrec_b))
1443 || (evolution_function_is_constant_p (chrec_b)
1444 && evolution_function_is_univariate_p (chrec_a)))
1445 return true;
48e1416a 1446
2146e26d 1447 if (evolution_function_is_univariate_p (chrec_a)
1448 && evolution_function_is_univariate_p (chrec_b))
1449 {
1450 switch (TREE_CODE (chrec_a))
1451 {
1452 case POLYNOMIAL_CHREC:
1453 switch (TREE_CODE (chrec_b))
1454 {
1455 case POLYNOMIAL_CHREC:
1456 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1457 return false;
48e1416a 1458
2146e26d 1459 default:
1460 return true;
1461 }
48e1416a 1462
2146e26d 1463 default:
1464 return true;
1465 }
1466 }
48e1416a 1467
2146e26d 1468 return false;
1469}
1470
87da4f2e 1471/* Creates a conflict function with N dimensions. The affine functions
1472 in each dimension follow. */
1473
1474static conflict_function *
1475conflict_fn (unsigned n, ...)
1476{
1477 unsigned i;
1478 conflict_function *ret = XCNEW (conflict_function);
1479 va_list ap;
1480
a86c75a6 1481 gcc_assert (0 < n && n <= MAX_DIM);
87da4f2e 1482 va_start(ap, n);
48e1416a 1483
87da4f2e 1484 ret->n = n;
1485 for (i = 0; i < n; i++)
1486 ret->fns[i] = va_arg (ap, affine_fn);
1487 va_end(ap);
1488
1489 return ret;
1490}
1491
1492/* Returns constant affine function with value CST. */
1493
1494static affine_fn
1495affine_fn_cst (tree cst)
1496{
1497 affine_fn fn = VEC_alloc (tree, heap, 1);
1498 VEC_quick_push (tree, fn, cst);
1499 return fn;
1500}
1501
1502/* Returns affine function with single variable, CST + COEF * x_DIM. */
1503
1504static affine_fn
1505affine_fn_univar (tree cst, unsigned dim, tree coef)
1506{
1507 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1508 unsigned i;
1509
1510 gcc_assert (dim > 0);
1511 VEC_quick_push (tree, fn, cst);
1512 for (i = 1; i < dim; i++)
1513 VEC_quick_push (tree, fn, integer_zero_node);
1514 VEC_quick_push (tree, fn, coef);
1515 return fn;
1516}
1517
2146e26d 1518/* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1519 *OVERLAPS_B are initialized to the functions that describe the
1520 relation between the elements accessed twice by CHREC_A and
1521 CHREC_B. For k >= 0, the following property is verified:
1522
1523 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1524
48e1416a 1525static void
1526analyze_ziv_subscript (tree chrec_a,
1527 tree chrec_b,
87da4f2e 1528 conflict_function **overlaps_a,
48e1416a 1529 conflict_function **overlaps_b,
bc3c8ad4 1530 tree *last_conflicts)
2146e26d 1531{
9a3dad1c 1532 tree type, difference;
6b421feb 1533 dependence_stats.num_ziv++;
48e1416a 1534
2146e26d 1535 if (dump_file && (dump_flags & TDF_DETAILS))
1536 fprintf (dump_file, "(analyze_ziv_subscript \n");
9a3dad1c 1537
1538 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
75a70cf9 1539 chrec_a = chrec_convert (type, chrec_a, NULL);
1540 chrec_b = chrec_convert (type, chrec_b, NULL);
9a3dad1c 1541 difference = chrec_fold_minus (type, chrec_a, chrec_b);
48e1416a 1542
2146e26d 1543 switch (TREE_CODE (difference))
1544 {
1545 case INTEGER_CST:
1546 if (integer_zerop (difference))
1547 {
1548 /* The difference is equal to zero: the accessed index
1549 overlaps for each iteration in the loop. */
87da4f2e 1550 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1551 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
bc3c8ad4 1552 *last_conflicts = chrec_dont_know;
6b421feb 1553 dependence_stats.num_ziv_dependent++;
2146e26d 1554 }
1555 else
1556 {
1557 /* The accesses do not overlap. */
87da4f2e 1558 *overlaps_a = conflict_fn_no_dependence ();
1559 *overlaps_b = conflict_fn_no_dependence ();
bc3c8ad4 1560 *last_conflicts = integer_zero_node;
6b421feb 1561 dependence_stats.num_ziv_independent++;
2146e26d 1562 }
1563 break;
48e1416a 1564
2146e26d 1565 default:
48e1416a 1566 /* We're not sure whether the indexes overlap. For the moment,
2146e26d 1567 conservatively answer "don't know". */
6b421feb 1568 if (dump_file && (dump_flags & TDF_DETAILS))
1569 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1570
87da4f2e 1571 *overlaps_a = conflict_fn_not_known ();
1572 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 1573 *last_conflicts = chrec_dont_know;
6b421feb 1574 dependence_stats.num_ziv_unimplemented++;
2146e26d 1575 break;
1576 }
48e1416a 1577
2146e26d 1578 if (dump_file && (dump_flags & TDF_DETAILS))
1579 fprintf (dump_file, ")\n");
1580}
1581
d500fef3 1582/* Sets NIT to the estimated number of executions of the statements in
1583 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1584 large as the number of iterations. If we have no reliable estimate,
1585 the function returns false, otherwise returns true. */
b6d41046 1586
d97e22fb 1587bool
d500fef3 1588estimated_loop_iterations (struct loop *loop, bool conservative,
1589 double_int *nit)
b6d41046 1590{
9b53b827 1591 estimate_numbers_of_iterations_loop (loop, true);
4b4ab846 1592 if (conservative)
d500fef3 1593 {
4b4ab846 1594 if (!loop->any_upper_bound)
1595 return false;
d500fef3 1596
4b4ab846 1597 *nit = loop->nb_iterations_upper_bound;
d500fef3 1598 }
4b4ab846 1599 else
faa56cf9 1600 {
4b4ab846 1601 if (!loop->any_estimate)
1602 return false;
1603
1604 *nit = loop->nb_iterations_estimate;
faa56cf9 1605 }
1606
4b4ab846 1607 return true;
d500fef3 1608}
1609
1610/* Similar to estimated_loop_iterations, but returns the estimate only
1611 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1612 on the number of iterations of LOOP could not be derived, returns -1. */
1613
1614HOST_WIDE_INT
1615estimated_loop_iterations_int (struct loop *loop, bool conservative)
1616{
1617 double_int nit;
1618 HOST_WIDE_INT hwi_nit;
1619
1620 if (!estimated_loop_iterations (loop, conservative, &nit))
1621 return -1;
1622
1623 if (!double_int_fits_in_shwi_p (nit))
1624 return -1;
1625 hwi_nit = double_int_to_shwi (nit);
1626
1627 return hwi_nit < 0 ? -1 : hwi_nit;
b6d41046 1628}
48e1416a 1629
d500fef3 1630/* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1631 and only if it fits to the int type. If this is not the case, or the
1632 estimate on the number of iterations of LOOP could not be derived, returns
1633 chrec_dont_know. */
1634
1635static tree
1636estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1637{
1638 double_int nit;
1639 tree type;
1640
1641 if (!estimated_loop_iterations (loop, conservative, &nit))
1642 return chrec_dont_know;
1643
1644 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1645 if (!double_int_fits_to_tree_p (type, nit))
1646 return chrec_dont_know;
1647
1648 return double_int_to_tree (type, nit);
1649}
1650
2146e26d 1651/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1652 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1653 *OVERLAPS_B are initialized to the functions that describe the
1654 relation between the elements accessed twice by CHREC_A and
1655 CHREC_B. For k >= 0, the following property is verified:
1656
1657 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1658
1659static void
48e1416a 1660analyze_siv_subscript_cst_affine (tree chrec_a,
2146e26d 1661 tree chrec_b,
48e1416a 1662 conflict_function **overlaps_a,
1663 conflict_function **overlaps_b,
bc3c8ad4 1664 tree *last_conflicts)
2146e26d 1665{
1666 bool value0, value1, value2;
9a3dad1c 1667 tree type, difference, tmp;
f84a688a 1668
9a3dad1c 1669 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
75a70cf9 1670 chrec_a = chrec_convert (type, chrec_a, NULL);
1671 chrec_b = chrec_convert (type, chrec_b, NULL);
9a3dad1c 1672 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
48e1416a 1673
2146e26d 1674 if (!chrec_is_positive (initial_condition (difference), &value0))
1675 {
6b421feb 1676 if (dump_file && (dump_flags & TDF_DETAILS))
48e1416a 1677 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
6b421feb 1678
1679 dependence_stats.num_siv_unimplemented++;
87da4f2e 1680 *overlaps_a = conflict_fn_not_known ();
1681 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 1682 *last_conflicts = chrec_dont_know;
2146e26d 1683 return;
1684 }
1685 else
1686 {
1687 if (value0 == false)
1688 {
1689 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1690 {
6b421feb 1691 if (dump_file && (dump_flags & TDF_DETAILS))
1692 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1693
87da4f2e 1694 *overlaps_a = conflict_fn_not_known ();
48e1416a 1695 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 1696 *last_conflicts = chrec_dont_know;
6b421feb 1697 dependence_stats.num_siv_unimplemented++;
2146e26d 1698 return;
1699 }
1700 else
1701 {
1702 if (value1 == true)
1703 {
48e1416a 1704 /* Example:
2146e26d 1705 chrec_a = 12
1706 chrec_b = {10, +, 1}
1707 */
48e1416a 1708
e7be49a3 1709 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2146e26d 1710 {
d500fef3 1711 HOST_WIDE_INT numiter;
1712 struct loop *loop = get_chrec_loop (chrec_b);
b6d41046 1713
87da4f2e 1714 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
9a3dad1c 1715 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1716 fold_build1 (ABS_EXPR, type, difference),
87da4f2e 1717 CHREC_RIGHT (chrec_b));
1718 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
bc3c8ad4 1719 *last_conflicts = integer_one_node;
48e1416a 1720
b6d41046 1721
1722 /* Perform weak-zero siv test to see if overlap is
1723 outside the loop bounds. */
4ee81b10 1724 numiter = estimated_loop_iterations_int (loop, false);
b6d41046 1725
d500fef3 1726 if (numiter >= 0
1727 && compare_tree_int (tmp, numiter) > 0)
b6d41046 1728 {
87da4f2e 1729 free_conflict_function (*overlaps_a);
1730 free_conflict_function (*overlaps_b);
1731 *overlaps_a = conflict_fn_no_dependence ();
1732 *overlaps_b = conflict_fn_no_dependence ();
b6d41046 1733 *last_conflicts = integer_zero_node;
6b421feb 1734 dependence_stats.num_siv_independent++;
b6d41046 1735 return;
48e1416a 1736 }
6b421feb 1737 dependence_stats.num_siv_dependent++;
2146e26d 1738 return;
1739 }
48e1416a 1740
e7be49a3 1741 /* When the step does not divide the difference, there are
2146e26d 1742 no overlaps. */
1743 else
1744 {
87da4f2e 1745 *overlaps_a = conflict_fn_no_dependence ();
48e1416a 1746 *overlaps_b = conflict_fn_no_dependence ();
bc3c8ad4 1747 *last_conflicts = integer_zero_node;
6b421feb 1748 dependence_stats.num_siv_independent++;
2146e26d 1749 return;
1750 }
1751 }
48e1416a 1752
2146e26d 1753 else
1754 {
48e1416a 1755 /* Example:
2146e26d 1756 chrec_a = 12
1757 chrec_b = {10, +, -1}
48e1416a 1758
2146e26d 1759 In this case, chrec_a will not overlap with chrec_b. */
87da4f2e 1760 *overlaps_a = conflict_fn_no_dependence ();
1761 *overlaps_b = conflict_fn_no_dependence ();
bc3c8ad4 1762 *last_conflicts = integer_zero_node;
6b421feb 1763 dependence_stats.num_siv_independent++;
2146e26d 1764 return;
1765 }
1766 }
1767 }
48e1416a 1768 else
2146e26d 1769 {
1770 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1771 {
6b421feb 1772 if (dump_file && (dump_flags & TDF_DETAILS))
1773 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1774
87da4f2e 1775 *overlaps_a = conflict_fn_not_known ();
48e1416a 1776 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 1777 *last_conflicts = chrec_dont_know;
6b421feb 1778 dependence_stats.num_siv_unimplemented++;
2146e26d 1779 return;
1780 }
1781 else
1782 {
1783 if (value2 == false)
1784 {
48e1416a 1785 /* Example:
2146e26d 1786 chrec_a = 3
1787 chrec_b = {10, +, -1}
1788 */
e7be49a3 1789 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2146e26d 1790 {
d500fef3 1791 HOST_WIDE_INT numiter;
1792 struct loop *loop = get_chrec_loop (chrec_b);
b6d41046 1793
87da4f2e 1794 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
9a3dad1c 1795 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
87da4f2e 1796 CHREC_RIGHT (chrec_b));
1797 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
bc3c8ad4 1798 *last_conflicts = integer_one_node;
b6d41046 1799
1800 /* Perform weak-zero siv test to see if overlap is
1801 outside the loop bounds. */
4ee81b10 1802 numiter = estimated_loop_iterations_int (loop, false);
b6d41046 1803
d500fef3 1804 if (numiter >= 0
1805 && compare_tree_int (tmp, numiter) > 0)
b6d41046 1806 {
87da4f2e 1807 free_conflict_function (*overlaps_a);
1808 free_conflict_function (*overlaps_b);
1809 *overlaps_a = conflict_fn_no_dependence ();
1810 *overlaps_b = conflict_fn_no_dependence ();
b6d41046 1811 *last_conflicts = integer_zero_node;
6b421feb 1812 dependence_stats.num_siv_independent++;
b6d41046 1813 return;
48e1416a 1814 }
6b421feb 1815 dependence_stats.num_siv_dependent++;
2146e26d 1816 return;
1817 }
48e1416a 1818
c4243fe7 1819 /* When the step does not divide the difference, there
2146e26d 1820 are no overlaps. */
1821 else
1822 {
87da4f2e 1823 *overlaps_a = conflict_fn_no_dependence ();
48e1416a 1824 *overlaps_b = conflict_fn_no_dependence ();
bc3c8ad4 1825 *last_conflicts = integer_zero_node;
6b421feb 1826 dependence_stats.num_siv_independent++;
2146e26d 1827 return;
1828 }
1829 }
1830 else
1831 {
48e1416a 1832 /* Example:
1833 chrec_a = 3
2146e26d 1834 chrec_b = {4, +, 1}
48e1416a 1835
2146e26d 1836 In this case, chrec_a will not overlap with chrec_b. */
87da4f2e 1837 *overlaps_a = conflict_fn_no_dependence ();
1838 *overlaps_b = conflict_fn_no_dependence ();
bc3c8ad4 1839 *last_conflicts = integer_zero_node;
6b421feb 1840 dependence_stats.num_siv_independent++;
2146e26d 1841 return;
1842 }
1843 }
1844 }
1845 }
1846}
1847
fbd828d4 1848/* Helper recursive function for initializing the matrix A. Returns
bc3c8ad4 1849 the initial value of CHREC. */
2146e26d 1850
8ceddf57 1851static tree
bc3c8ad4 1852initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1853{
1854 gcc_assert (chrec);
1855
8ceddf57 1856 switch (TREE_CODE (chrec))
1857 {
1858 case POLYNOMIAL_CHREC:
1859 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1860
1861 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1862 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1863
1864 case PLUS_EXPR:
1865 case MULT_EXPR:
1866 case MINUS_EXPR:
1867 {
1868 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1869 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1870
1871 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1872 }
1873
1874 case NOP_EXPR:
1875 {
1876 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
75a70cf9 1877 return chrec_convert (chrec_type (chrec), op, NULL);
8ceddf57 1878 }
1879
b6eab06c 1880 case BIT_NOT_EXPR:
1881 {
1882 /* Handle ~X as -1 - X. */
1883 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1884 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1885 build_int_cst (TREE_TYPE (chrec), -1), op);
1886 }
1887
8ceddf57 1888 case INTEGER_CST:
1889 return chrec;
c782188f 1890
8ceddf57 1891 default:
1892 gcc_unreachable ();
1893 return NULL_TREE;
1894 }
bc3c8ad4 1895}
1896
1897#define FLOOR_DIV(x,y) ((x) / (y))
1898
48e1416a 1899/* Solves the special case of the Diophantine equation:
bc3c8ad4 1900 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1901
1902 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1903 number of iterations that loops X and Y run. The overlaps will be
1904 constructed as evolutions in dimension DIM. */
2146e26d 1905
1906static void
48e1416a 1907compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
87da4f2e 1908 affine_fn *overlaps_a,
48e1416a 1909 affine_fn *overlaps_b,
bc3c8ad4 1910 tree *last_conflicts, int dim)
1911{
1912 if (((step_a > 0 && step_b > 0)
1913 || (step_a < 0 && step_b < 0)))
1914 {
1915 int step_overlaps_a, step_overlaps_b;
1916 int gcd_steps_a_b, last_conflict, tau2;
1917
1918 gcd_steps_a_b = gcd (step_a, step_b);
1919 step_overlaps_a = step_b / gcd_steps_a_b;
1920 step_overlaps_b = step_a / gcd_steps_a_b;
1921
e781c550 1922 if (niter > 0)
1923 {
1924 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1925 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1926 last_conflict = tau2;
1927 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1928 }
1929 else
1930 *last_conflicts = chrec_dont_know;
bc3c8ad4 1931
48e1416a 1932 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
87da4f2e 1933 build_int_cst (NULL_TREE,
1934 step_overlaps_a));
48e1416a 1935 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1936 build_int_cst (NULL_TREE,
87da4f2e 1937 step_overlaps_b));
bc3c8ad4 1938 }
1939
1940 else
1941 {
87da4f2e 1942 *overlaps_a = affine_fn_cst (integer_zero_node);
1943 *overlaps_b = affine_fn_cst (integer_zero_node);
bc3c8ad4 1944 *last_conflicts = integer_zero_node;
1945 }
1946}
1947
bc3c8ad4 1948/* Solves the special case of a Diophantine equation where CHREC_A is
1949 an affine bivariate function, and CHREC_B is an affine univariate
48e1416a 1950 function. For example,
bc3c8ad4 1951
1952 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
48e1416a 1953
1954 has the following overlapping functions:
bc3c8ad4 1955
1956 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1957 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1958 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1959
0975351b 1960 FORNOW: This is a specialized implementation for a case occurring in
bc3c8ad4 1961 a common benchmark. Implement the general algorithm. */
1962
1963static void
48e1416a 1964compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
87da4f2e 1965 conflict_function **overlaps_a,
48e1416a 1966 conflict_function **overlaps_b,
bc3c8ad4 1967 tree *last_conflicts)
2146e26d 1968{
bc3c8ad4 1969 bool xz_p, yz_p, xyz_p;
1970 int step_x, step_y, step_z;
d500fef3 1971 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
87da4f2e 1972 affine_fn overlaps_a_xz, overlaps_b_xz;
1973 affine_fn overlaps_a_yz, overlaps_b_yz;
1974 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1975 affine_fn ova1, ova2, ovb;
1976 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
bc3c8ad4 1977
58f77a87 1978 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1979 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1980 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
bc3c8ad4 1981
48e1416a 1982 niter_x =
4ee81b10 1983 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1984 false);
1985 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1986 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
48e1416a 1987
d500fef3 1988 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
bc3c8ad4 1989 {
6b421feb 1990 if (dump_file && (dump_flags & TDF_DETAILS))
1991 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
48e1416a 1992
87da4f2e 1993 *overlaps_a = conflict_fn_not_known ();
1994 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 1995 *last_conflicts = chrec_dont_know;
1996 return;
1997 }
1998
bc3c8ad4 1999 niter = MIN (niter_x, niter_z);
2000 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2001 &overlaps_a_xz,
2002 &overlaps_b_xz,
2003 &last_conflicts_xz, 1);
2004 niter = MIN (niter_y, niter_z);
2005 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2006 &overlaps_a_yz,
2007 &overlaps_b_yz,
2008 &last_conflicts_yz, 2);
2009 niter = MIN (niter_x, niter_z);
2010 niter = MIN (niter_y, niter);
2011 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2012 &overlaps_a_xyz,
2013 &overlaps_b_xyz,
2014 &last_conflicts_xyz, 3);
2015
2016 xz_p = !integer_zerop (last_conflicts_xz);
2017 yz_p = !integer_zerop (last_conflicts_yz);
2018 xyz_p = !integer_zerop (last_conflicts_xyz);
2019
2020 if (xz_p || yz_p || xyz_p)
2021 {
87da4f2e 2022 ova1 = affine_fn_cst (integer_zero_node);
2023 ova2 = affine_fn_cst (integer_zero_node);
2024 ovb = affine_fn_cst (integer_zero_node);
bc3c8ad4 2025 if (xz_p)
2026 {
87da4f2e 2027 affine_fn t0 = ova1;
2028 affine_fn t2 = ovb;
2029
2030 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2031 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2032 affine_fn_free (t0);
2033 affine_fn_free (t2);
bc3c8ad4 2034 *last_conflicts = last_conflicts_xz;
2035 }
2036 if (yz_p)
2037 {
87da4f2e 2038 affine_fn t0 = ova2;
2039 affine_fn t2 = ovb;
2040
2041 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2042 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2043 affine_fn_free (t0);
2044 affine_fn_free (t2);
bc3c8ad4 2045 *last_conflicts = last_conflicts_yz;
2046 }
2047 if (xyz_p)
2048 {
87da4f2e 2049 affine_fn t0 = ova1;
2050 affine_fn t2 = ova2;
2051 affine_fn t4 = ovb;
2052
2053 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2054 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2055 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2056 affine_fn_free (t0);
2057 affine_fn_free (t2);
2058 affine_fn_free (t4);
bc3c8ad4 2059 *last_conflicts = last_conflicts_xyz;
2060 }
87da4f2e 2061 *overlaps_a = conflict_fn (2, ova1, ova2);
2062 *overlaps_b = conflict_fn (1, ovb);
bc3c8ad4 2063 }
2064 else
2065 {
87da4f2e 2066 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2067 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
bc3c8ad4 2068 *last_conflicts = integer_zero_node;
2069 }
87da4f2e 2070
2071 affine_fn_free (overlaps_a_xz);
2072 affine_fn_free (overlaps_b_xz);
2073 affine_fn_free (overlaps_a_yz);
2074 affine_fn_free (overlaps_b_yz);
2075 affine_fn_free (overlaps_a_xyz);
2076 affine_fn_free (overlaps_b_xyz);
2146e26d 2077}
2078
e01f9f1f 2079/* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2080
2081static void
2082lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2083 int size)
2084{
2085 memcpy (vec2, vec1, size * sizeof (*vec1));
2086}
2087
2088/* Copy the elements of M x N matrix MAT1 to MAT2. */
2089
2090static void
2091lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2092 int m, int n)
2093{
2094 int i;
2095
2096 for (i = 0; i < m; i++)
2097 lambda_vector_copy (mat1[i], mat2[i], n);
2098}
2099
2100/* Store the N x N identity matrix in MAT. */
2101
2102static void
2103lambda_matrix_id (lambda_matrix mat, int size)
2104{
2105 int i, j;
2106
2107 for (i = 0; i < size; i++)
2108 for (j = 0; j < size; j++)
2109 mat[i][j] = (i == j) ? 1 : 0;
2110}
2111
2112/* Return the first nonzero element of vector VEC1 between START and N.
2113 We must have START <= N. Returns N if VEC1 is the zero vector. */
2114
2115static int
2116lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2117{
2118 int j = start;
2119 while (j < n && vec1[j] == 0)
2120 j++;
2121 return j;
2122}
2123
2124/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2125 R2 = R2 + CONST1 * R1. */
2126
2127static void
2128lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2129{
2130 int i;
2131
2132 if (const1 == 0)
2133 return;
2134
2135 for (i = 0; i < n; i++)
2136 mat[r2][i] += const1 * mat[r1][i];
2137}
2138
2139/* Swap rows R1 and R2 in matrix MAT. */
2140
2141static void
2142lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2143{
2144 lambda_vector row;
2145
2146 row = mat[r1];
2147 mat[r1] = mat[r2];
2148 mat[r2] = row;
2149}
2150
2151/* Multiply vector VEC1 of length SIZE by a constant CONST1,
2152 and store the result in VEC2. */
2153
2154static void
2155lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2156 int size, int const1)
2157{
2158 int i;
2159
2160 if (const1 == 0)
2161 lambda_vector_clear (vec2, size);
2162 else
2163 for (i = 0; i < size; i++)
2164 vec2[i] = const1 * vec1[i];
2165}
2166
2167/* Negate vector VEC1 with length SIZE and store it in VEC2. */
2168
2169static void
2170lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2171 int size)
2172{
2173 lambda_vector_mult_const (vec1, vec2, size, -1);
2174}
2175
2176/* Negate row R1 of matrix MAT which has N columns. */
2177
2178static void
2179lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2180{
2181 lambda_vector_negate (mat[r1], mat[r1], n);
2182}
2183
2184/* Return true if two vectors are equal. */
2185
2186static bool
2187lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2188{
2189 int i;
2190 for (i = 0; i < size; i++)
2191 if (vec1[i] != vec2[i])
2192 return false;
2193 return true;
2194}
2195
2196/* Given an M x N integer matrix A, this function determines an M x
2197 M unimodular matrix U, and an M x N echelon matrix S such that
2198 "U.A = S". This decomposition is also known as "right Hermite".
2199
2200 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2201 Restructuring Compilers" Utpal Banerjee. */
2202
2203static void
2204lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2205 lambda_matrix S, lambda_matrix U)
2206{
2207 int i, j, i0 = 0;
2208
2209 lambda_matrix_copy (A, S, m, n);
2210 lambda_matrix_id (U, m);
2211
2212 for (j = 0; j < n; j++)
2213 {
2214 if (lambda_vector_first_nz (S[j], m, i0) < m)
2215 {
2216 ++i0;
2217 for (i = m - 1; i >= i0; i--)
2218 {
2219 while (S[i][j] != 0)
2220 {
2221 int sigma, factor, a, b;
2222
2223 a = S[i-1][j];
2224 b = S[i][j];
2225 sigma = (a * b < 0) ? -1: 1;
2226 a = abs (a);
2227 b = abs (b);
2228 factor = sigma * (a / b);
2229
2230 lambda_matrix_row_add (S, n, i, i-1, -factor);
2231 lambda_matrix_row_exchange (S, i, i-1);
2232
2233 lambda_matrix_row_add (U, m, i, i-1, -factor);
2234 lambda_matrix_row_exchange (U, i, i-1);
2235 }
2236 }
2237 }
2238 }
2239}
2240
2146e26d 2241/* Determines the overlapping elements due to accesses CHREC_A and
6b421feb 2242 CHREC_B, that are affine functions. This function cannot handle
2243 symbolic evolution functions, ie. when initial conditions are
2244 parameters, because it uses lambda matrices of integers. */
2146e26d 2245
2246static void
48e1416a 2247analyze_subscript_affine_affine (tree chrec_a,
2146e26d 2248 tree chrec_b,
48e1416a 2249 conflict_function **overlaps_a,
2250 conflict_function **overlaps_b,
bc3c8ad4 2251 tree *last_conflicts)
2146e26d 2252{
bc3c8ad4 2253 unsigned nb_vars_a, nb_vars_b, dim;
4ee81b10 2254 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
bc3c8ad4 2255 lambda_matrix A, U, S;
1e33ad50 2256 struct obstack scratch_obstack;
bc3c8ad4 2257
f84a688a 2258 if (eq_evolutions_p (chrec_a, chrec_b))
b6d41046 2259 {
f84a688a 2260 /* The accessed index overlaps for each iteration in the
2261 loop. */
87da4f2e 2262 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2263 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
b6d41046 2264 *last_conflicts = chrec_dont_know;
2265 return;
2266 }
2146e26d 2267 if (dump_file && (dump_flags & TDF_DETAILS))
2268 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
48e1416a 2269
2146e26d 2270 /* For determining the initial intersection, we have to solve a
2271 Diophantine equation. This is the most time consuming part.
48e1416a 2272
2146e26d 2273 For answering to the question: "Is there a dependence?" we have
2274 to prove that there exists a solution to the Diophantine
2275 equation, and that the solution is in the iteration domain,
5c9dae64 2276 i.e. the solution is positive or zero, and that the solution
2146e26d 2277 happens before the upper bound loop.nb_iterations. Otherwise
2278 there is no dependence. This function outputs a description of
2279 the iterations that hold the intersections. */
2280
bc3c8ad4 2281 nb_vars_a = nb_vars_in_chrec (chrec_a);
2282 nb_vars_b = nb_vars_in_chrec (chrec_b);
2283
1e33ad50 2284 gcc_obstack_init (&scratch_obstack);
2285
bc3c8ad4 2286 dim = nb_vars_a + nb_vars_b;
1e33ad50 2287 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2288 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2289 S = lambda_matrix_new (dim, 1, &scratch_obstack);
bc3c8ad4 2290
8ceddf57 2291 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2292 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
bc3c8ad4 2293 gamma = init_b - init_a;
2294
2295 /* Don't do all the hard work of solving the Diophantine equation
48e1416a 2296 when we already know the solution: for example,
bc3c8ad4 2297 | {3, +, 1}_1
2298 | {3, +, 4}_2
2299 | gamma = 3 - 3 = 0.
48e1416a 2300 Then the first overlap occurs during the first iterations:
bc3c8ad4 2301 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2302 */
2303 if (gamma == 0)
2146e26d 2304 {
bc3c8ad4 2305 if (nb_vars_a == 1 && nb_vars_b == 1)
2146e26d 2306 {
4ee81b10 2307 HOST_WIDE_INT step_a, step_b;
d500fef3 2308 HOST_WIDE_INT niter, niter_a, niter_b;
87da4f2e 2309 affine_fn ova, ovb;
bc3c8ad4 2310
4ee81b10 2311 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2312 false);
2313 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2314 false);
bc3c8ad4 2315 niter = MIN (niter_a, niter_b);
58f77a87 2316 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2317 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
bc3c8ad4 2318
48e1416a 2319 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2320 &ova, &ovb,
bc3c8ad4 2321 last_conflicts, 1);
87da4f2e 2322 *overlaps_a = conflict_fn (1, ova);
2323 *overlaps_b = conflict_fn (1, ovb);
2146e26d 2324 }
bc3c8ad4 2325
2326 else if (nb_vars_a == 2 && nb_vars_b == 1)
2327 compute_overlap_steps_for_affine_1_2
2328 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2329
2330 else if (nb_vars_a == 1 && nb_vars_b == 2)
2331 compute_overlap_steps_for_affine_1_2
2332 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2333
2334 else
2146e26d 2335 {
6b421feb 2336 if (dump_file && (dump_flags & TDF_DETAILS))
2337 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
87da4f2e 2338 *overlaps_a = conflict_fn_not_known ();
2339 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 2340 *last_conflicts = chrec_dont_know;
2146e26d 2341 }
6b421feb 2342 goto end_analyze_subs_aa;
bc3c8ad4 2343 }
2344
2345 /* U.A = S */
2346 lambda_matrix_right_hermite (A, dim, 1, S, U);
2347
2348 if (S[0][0] < 0)
2349 {
2350 S[0][0] *= -1;
2351 lambda_matrix_row_negate (U, dim, 0);
2352 }
2353 gcd_alpha_beta = S[0][0];
2354
b44d1046 2355 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2356 but that is a quite strange case. Instead of ICEing, answer
2357 don't know. */
2358 if (gcd_alpha_beta == 0)
2359 {
87da4f2e 2360 *overlaps_a = conflict_fn_not_known ();
2361 *overlaps_b = conflict_fn_not_known ();
b44d1046 2362 *last_conflicts = chrec_dont_know;
2363 goto end_analyze_subs_aa;
2364 }
2365
bc3c8ad4 2366 /* The classic "gcd-test". */
2367 if (!int_divides_p (gcd_alpha_beta, gamma))
2368 {
2369 /* The "gcd-test" has determined that there is no integer
2370 solution, i.e. there is no dependence. */
87da4f2e 2371 *overlaps_a = conflict_fn_no_dependence ();
2372 *overlaps_b = conflict_fn_no_dependence ();
bc3c8ad4 2373 *last_conflicts = integer_zero_node;
2374 }
2375
2376 /* Both access functions are univariate. This includes SIV and MIV cases. */
2377 else if (nb_vars_a == 1 && nb_vars_b == 1)
2378 {
2379 /* Both functions should have the same evolution sign. */
2380 if (((A[0][0] > 0 && -A[1][0] > 0)
2381 || (A[0][0] < 0 && -A[1][0] < 0)))
2146e26d 2382 {
2383 /* The solutions are given by:
48e1416a 2384 |
bc3c8ad4 2385 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2386 | [u21 u22] [y0]
48e1416a 2387
2146e26d 2388 For a given integer t. Using the following variables,
48e1416a 2389
2146e26d 2390 | i0 = u11 * gamma / gcd_alpha_beta
2391 | j0 = u12 * gamma / gcd_alpha_beta
2392 | i1 = u21
2393 | j1 = u22
48e1416a 2394
2146e26d 2395 the solutions are:
48e1416a 2396
2397 | x0 = i0 + i1 * t,
bc3c8ad4 2398 | y0 = j0 + j1 * t. */
e781c550 2399 HOST_WIDE_INT i0, j0, i1, j1;
bc3c8ad4 2400
2401 i0 = U[0][0] * gamma / gcd_alpha_beta;
2402 j0 = U[0][1] * gamma / gcd_alpha_beta;
2403 i1 = U[1][0];
2404 j1 = U[1][1];
2405
2406 if ((i1 == 0 && i0 < 0)
2407 || (j1 == 0 && j0 < 0))
2146e26d 2408 {
48e1416a 2409 /* There is no solution.
2410 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2411 falls in here, but for the moment we don't look at the
2146e26d 2412 upper bound of the iteration domain. */
87da4f2e 2413 *overlaps_a = conflict_fn_no_dependence ();
2414 *overlaps_b = conflict_fn_no_dependence ();
bc3c8ad4 2415 *last_conflicts = integer_zero_node;
e781c550 2416 goto end_analyze_subs_aa;
bc3c8ad4 2417 }
2418
e781c550 2419 if (i1 > 0 && j1 > 0)
2146e26d 2420 {
e781c550 2421 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2422 (get_chrec_loop (chrec_a), false);
2423 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2424 (get_chrec_loop (chrec_b), false);
2425 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2426
2427 /* (X0, Y0) is a solution of the Diophantine equation:
2428 "chrec_a (X0) = chrec_b (Y0)". */
2429 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2430 CEIL (-j0, j1));
2431 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2432 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2433
2434 /* (X1, Y1) is the smallest positive solution of the eq
2435 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2436 first conflict occurs. */
2437 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2438 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2439 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2440
2441 if (niter > 0)
2146e26d 2442 {
e781c550 2443 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2444 FLOOR_DIV (niter - j0, j1));
2445 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
bc3c8ad4 2446
e781c550 2447 /* If the overlap occurs outside of the bounds of the
2448 loop, there is no dependence. */
63ca8934 2449 if (x1 >= niter || y1 >= niter)
2146e26d 2450 {
e781c550 2451 *overlaps_a = conflict_fn_no_dependence ();
2452 *overlaps_b = conflict_fn_no_dependence ();
2453 *last_conflicts = integer_zero_node;
2454 goto end_analyze_subs_aa;
2146e26d 2455 }
2456 else
e781c550 2457 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2146e26d 2458 }
2146e26d 2459 else
e781c550 2460 *last_conflicts = chrec_dont_know;
2461
2462 *overlaps_a
2463 = conflict_fn (1,
2464 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2465 1,
2466 build_int_cst (NULL_TREE, i1)));
2467 *overlaps_b
2468 = conflict_fn (1,
2469 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2470 1,
2471 build_int_cst (NULL_TREE, j1)));
2472 }
2473 else
2474 {
2475 /* FIXME: For the moment, the upper bound of the
2476 iteration domain for i and j is not checked. */
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2479 *overlaps_a = conflict_fn_not_known ();
2480 *overlaps_b = conflict_fn_not_known ();
2481 *last_conflicts = chrec_dont_know;
2146e26d 2482 }
2483 }
bc3c8ad4 2484 else
2485 {
6b421feb 2486 if (dump_file && (dump_flags & TDF_DETAILS))
2487 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
87da4f2e 2488 *overlaps_a = conflict_fn_not_known ();
2489 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 2490 *last_conflicts = chrec_dont_know;
2491 }
2146e26d 2492 }
2146e26d 2493 else
2494 {
6b421feb 2495 if (dump_file && (dump_flags & TDF_DETAILS))
2496 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
87da4f2e 2497 *overlaps_a = conflict_fn_not_known ();
2498 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 2499 *last_conflicts = chrec_dont_know;
2146e26d 2500 }
bc3c8ad4 2501
48e1416a 2502end_analyze_subs_aa:
1e33ad50 2503 obstack_free (&scratch_obstack, NULL);
2146e26d 2504 if (dump_file && (dump_flags & TDF_DETAILS))
2505 {
2506 fprintf (dump_file, " (overlaps_a = ");
87da4f2e 2507 dump_conflict_function (dump_file, *overlaps_a);
2146e26d 2508 fprintf (dump_file, ")\n (overlaps_b = ");
87da4f2e 2509 dump_conflict_function (dump_file, *overlaps_b);
2146e26d 2510 fprintf (dump_file, ")\n");
6b421feb 2511 fprintf (dump_file, ")\n");
2146e26d 2512 }
6b421feb 2513}
2514
2515/* Returns true when analyze_subscript_affine_affine can be used for
2516 determining the dependence relation between chrec_a and chrec_b,
2517 that contain symbols. This function modifies chrec_a and chrec_b
2518 such that the analysis result is the same, and such that they don't
48e1416a 2519 contain symbols, and then can safely be passed to the analyzer.
6b421feb 2520
2521 Example: The analysis of the following tuples of evolutions produce
2522 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2523 vs. {0, +, 1}_1
48e1416a 2524
6b421feb 2525 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2526 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2527*/
2528
2529static bool
2530can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2531{
b92381b6 2532 tree diff, type, left_a, left_b, right_b;
6b421feb 2533
2534 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2535 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2536 /* FIXME: For the moment not handled. Might be refined later. */
2537 return false;
2538
b92381b6 2539 type = chrec_type (*chrec_a);
2540 left_a = CHREC_LEFT (*chrec_a);
75a70cf9 2541 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
b92381b6 2542 diff = chrec_fold_minus (type, left_a, left_b);
2543
6b421feb 2544 if (!evolution_function_is_constant_p (diff))
2545 return false;
2546
2146e26d 2547 if (dump_file && (dump_flags & TDF_DETAILS))
6b421feb 2548 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2549
48e1416a 2550 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
6b421feb 2551 diff, CHREC_RIGHT (*chrec_a));
75a70cf9 2552 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
6b421feb 2553 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
7c010d6a 2554 build_int_cst (type, 0),
b92381b6 2555 right_b);
6b421feb 2556 return true;
2146e26d 2557}
2558
2559/* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2560 *OVERLAPS_B are initialized to the functions that describe the
2561 relation between the elements accessed twice by CHREC_A and
2562 CHREC_B. For k >= 0, the following property is verified:
2563
2564 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2565
2566static void
48e1416a 2567analyze_siv_subscript (tree chrec_a,
2146e26d 2568 tree chrec_b,
48e1416a 2569 conflict_function **overlaps_a,
2570 conflict_function **overlaps_b,
8ceddf57 2571 tree *last_conflicts,
2572 int loop_nest_num)
2146e26d 2573{
6b421feb 2574 dependence_stats.num_siv++;
48e1416a 2575
2146e26d 2576 if (dump_file && (dump_flags & TDF_DETAILS))
2577 fprintf (dump_file, "(analyze_siv_subscript \n");
48e1416a 2578
2146e26d 2579 if (evolution_function_is_constant_p (chrec_a)
8ceddf57 2580 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
48e1416a 2581 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
bc3c8ad4 2582 overlaps_a, overlaps_b, last_conflicts);
48e1416a 2583
8ceddf57 2584 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2146e26d 2585 && evolution_function_is_constant_p (chrec_b))
48e1416a 2586 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
bc3c8ad4 2587 overlaps_b, overlaps_a, last_conflicts);
48e1416a 2588
8ceddf57 2589 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2590 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
6b421feb 2591 {
2592 if (!chrec_contains_symbols (chrec_a)
2593 && !chrec_contains_symbols (chrec_b))
2594 {
48e1416a 2595 analyze_subscript_affine_affine (chrec_a, chrec_b,
2596 overlaps_a, overlaps_b,
6b421feb 2597 last_conflicts);
2598
87da4f2e 2599 if (CF_NOT_KNOWN_P (*overlaps_a)
2600 || CF_NOT_KNOWN_P (*overlaps_b))
6b421feb 2601 dependence_stats.num_siv_unimplemented++;
87da4f2e 2602 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2603 || CF_NO_DEPENDENCE_P (*overlaps_b))
6b421feb 2604 dependence_stats.num_siv_independent++;
2605 else
2606 dependence_stats.num_siv_dependent++;
2607 }
48e1416a 2608 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
6b421feb 2609 &chrec_b))
2610 {
48e1416a 2611 analyze_subscript_affine_affine (chrec_a, chrec_b,
2612 overlaps_a, overlaps_b,
6b421feb 2613 last_conflicts);
6b421feb 2614
87da4f2e 2615 if (CF_NOT_KNOWN_P (*overlaps_a)
2616 || CF_NOT_KNOWN_P (*overlaps_b))
6b421feb 2617 dependence_stats.num_siv_unimplemented++;
87da4f2e 2618 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2619 || CF_NO_DEPENDENCE_P (*overlaps_b))
6b421feb 2620 dependence_stats.num_siv_independent++;
2621 else
2622 dependence_stats.num_siv_dependent++;
2623 }
2624 else
2625 goto siv_subscript_dontknow;
2626 }
2627
2146e26d 2628 else
2629 {
6b421feb 2630 siv_subscript_dontknow:;
2631 if (dump_file && (dump_flags & TDF_DETAILS))
2632 fprintf (dump_file, "siv test failed: unimplemented.\n");
87da4f2e 2633 *overlaps_a = conflict_fn_not_known ();
2634 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 2635 *last_conflicts = chrec_dont_know;
6b421feb 2636 dependence_stats.num_siv_unimplemented++;
2146e26d 2637 }
48e1416a 2638
2146e26d 2639 if (dump_file && (dump_flags & TDF_DETAILS))
2640 fprintf (dump_file, ")\n");
2641}
2642
cabedb72 2643/* Returns false if we can prove that the greatest common divisor of the steps
2644 of CHREC does not divide CST, false otherwise. */
2146e26d 2645
2646static bool
7ecb5bb2 2647gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2146e26d 2648{
cabedb72 2649 HOST_WIDE_INT cd = 0, val;
2650 tree step;
6b421feb 2651
cabedb72 2652 if (!host_integerp (cst, 0))
2653 return true;
2654 val = tree_low_cst (cst, 0);
2655
2656 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2657 {
2658 step = CHREC_RIGHT (chrec);
2659 if (!host_integerp (step, 0))
2660 return true;
2661 cd = gcd (cd, tree_low_cst (step, 0));
2662 chrec = CHREC_LEFT (chrec);
2146e26d 2663 }
cabedb72 2664
2665 return val % cd == 0;
2146e26d 2666}
2667
8c7b1d8e 2668/* Analyze a MIV (Multiple Index Variable) subscript with respect to
2669 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2670 functions that describe the relation between the elements accessed
2671 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2672 is verified:
2146e26d 2673
2674 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2675
2676static void
48e1416a 2677analyze_miv_subscript (tree chrec_a,
2678 tree chrec_b,
2679 conflict_function **overlaps_a,
2680 conflict_function **overlaps_b,
8c7b1d8e 2681 tree *last_conflicts,
2682 struct loop *loop_nest)
2146e26d 2683{
9a3dad1c 2684 tree type, difference;
2685
6b421feb 2686 dependence_stats.num_miv++;
2146e26d 2687 if (dump_file && (dump_flags & TDF_DETAILS))
2688 fprintf (dump_file, "(analyze_miv_subscript \n");
f84a688a 2689
9a3dad1c 2690 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
75a70cf9 2691 chrec_a = chrec_convert (type, chrec_a, NULL);
2692 chrec_b = chrec_convert (type, chrec_b, NULL);
9a3dad1c 2693 difference = chrec_fold_minus (type, chrec_a, chrec_b);
48e1416a 2694
f84a688a 2695 if (eq_evolutions_p (chrec_a, chrec_b))
2146e26d 2696 {
2697 /* Access functions are the same: all the elements are accessed
2698 in the same order. */
87da4f2e 2699 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2700 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
d500fef3 2701 *last_conflicts = estimated_loop_iterations_tree
2702 (get_chrec_loop (chrec_a), true);
6b421feb 2703 dependence_stats.num_miv_dependent++;
2146e26d 2704 }
48e1416a 2705
2146e26d 2706 else if (evolution_function_is_constant_p (difference)
2707 /* For the moment, the following is verified:
8c7b1d8e 2708 evolution_function_is_affine_multivariate_p (chrec_a,
2709 loop_nest->num) */
cabedb72 2710 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2146e26d 2711 {
2712 /* testsuite/.../ssa-chrec-33.c
48e1416a 2713 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2714
cabedb72 2715 The difference is 1, and all the evolution steps are multiples
2716 of 2, consequently there are no overlapping elements. */
87da4f2e 2717 *overlaps_a = conflict_fn_no_dependence ();
2718 *overlaps_b = conflict_fn_no_dependence ();
bc3c8ad4 2719 *last_conflicts = integer_zero_node;
6b421feb 2720 dependence_stats.num_miv_independent++;
2146e26d 2721 }
48e1416a 2722
8c7b1d8e 2723 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
6b421feb 2724 && !chrec_contains_symbols (chrec_a)
8c7b1d8e 2725 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
6b421feb 2726 && !chrec_contains_symbols (chrec_b))
2146e26d 2727 {
2728 /* testsuite/.../ssa-chrec-35.c
2729 {0, +, 1}_2 vs. {0, +, 1}_3
2730 the overlapping elements are respectively located at iterations:
48e1416a 2731 {0, +, 1}_x and {0, +, 1}_x,
2732 in other words, we have the equality:
bc3c8ad4 2733 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
48e1416a 2734
2735 Other examples:
2736 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
bc3c8ad4 2737 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2738
48e1416a 2739 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
bc3c8ad4 2740 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2146e26d 2741 */
48e1416a 2742 analyze_subscript_affine_affine (chrec_a, chrec_b,
bc3c8ad4 2743 overlaps_a, overlaps_b, last_conflicts);
6b421feb 2744
87da4f2e 2745 if (CF_NOT_KNOWN_P (*overlaps_a)
2746 || CF_NOT_KNOWN_P (*overlaps_b))
6b421feb 2747 dependence_stats.num_miv_unimplemented++;
87da4f2e 2748 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2749 || CF_NO_DEPENDENCE_P (*overlaps_b))
6b421feb 2750 dependence_stats.num_miv_independent++;
2751 else
2752 dependence_stats.num_miv_dependent++;
2146e26d 2753 }
48e1416a 2754
2146e26d 2755 else
2756 {
2757 /* When the analysis is too difficult, answer "don't know". */
6b421feb 2758 if (dump_file && (dump_flags & TDF_DETAILS))
2759 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2760
87da4f2e 2761 *overlaps_a = conflict_fn_not_known ();
2762 *overlaps_b = conflict_fn_not_known ();
bc3c8ad4 2763 *last_conflicts = chrec_dont_know;
6b421feb 2764 dependence_stats.num_miv_unimplemented++;
2146e26d 2765 }
48e1416a 2766
2146e26d 2767 if (dump_file && (dump_flags & TDF_DETAILS))
2768 fprintf (dump_file, ")\n");
2769}
2770
8c7b1d8e 2771/* Determines the iterations for which CHREC_A is equal to CHREC_B in
2772 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2773 OVERLAP_ITERATIONS_B are initialized with two functions that
2774 describe the iterations that contain conflicting elements.
48e1416a 2775
2146e26d 2776 Remark: For an integer k >= 0, the following equality is true:
48e1416a 2777
2146e26d 2778 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2779*/
2780
48e1416a 2781static void
2782analyze_overlapping_iterations (tree chrec_a,
2783 tree chrec_b,
2784 conflict_function **overlap_iterations_a,
2785 conflict_function **overlap_iterations_b,
8c7b1d8e 2786 tree *last_conflicts, struct loop *loop_nest)
2146e26d 2787{
8c7b1d8e 2788 unsigned int lnn = loop_nest->num;
2789
6b421feb 2790 dependence_stats.num_subscript_tests++;
48e1416a 2791
2146e26d 2792 if (dump_file && (dump_flags & TDF_DETAILS))
2793 {
2794 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2795 fprintf (dump_file, " (chrec_a = ");
2796 print_generic_expr (dump_file, chrec_a, 0);
6b421feb 2797 fprintf (dump_file, ")\n (chrec_b = ");
2146e26d 2798 print_generic_expr (dump_file, chrec_b, 0);
2799 fprintf (dump_file, ")\n");
2800 }
6b421feb 2801
2146e26d 2802 if (chrec_a == NULL_TREE
2803 || chrec_b == NULL_TREE
2804 || chrec_contains_undetermined (chrec_a)
6b421feb 2805 || chrec_contains_undetermined (chrec_b))
2146e26d 2806 {
6b421feb 2807 dependence_stats.num_subscript_undetermined++;
48e1416a 2808
87da4f2e 2809 *overlap_iterations_a = conflict_fn_not_known ();
2810 *overlap_iterations_b = conflict_fn_not_known ();
2146e26d 2811 }
6b421feb 2812
48e1416a 2813 /* If they are the same chrec, and are affine, they overlap
6b421feb 2814 on every iteration. */
2815 else if (eq_evolutions_p (chrec_a, chrec_b)
328218c2 2816 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2817 || operand_equal_p (chrec_a, chrec_b, 0)))
6b421feb 2818 {
2819 dependence_stats.num_same_subscript_function++;
87da4f2e 2820 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2821 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
6b421feb 2822 *last_conflicts = chrec_dont_know;
2823 }
2824
2825 /* If they aren't the same, and aren't affine, we can't do anything
328218c2 2826 yet. */
48e1416a 2827 else if ((chrec_contains_symbols (chrec_a)
6b421feb 2828 || chrec_contains_symbols (chrec_b))
8c7b1d8e 2829 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2830 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
6b421feb 2831 {
2832 dependence_stats.num_subscript_undetermined++;
87da4f2e 2833 *overlap_iterations_a = conflict_fn_not_known ();
2834 *overlap_iterations_b = conflict_fn_not_known ();
6b421feb 2835 }
2836
2146e26d 2837 else if (ziv_subscript_p (chrec_a, chrec_b))
48e1416a 2838 analyze_ziv_subscript (chrec_a, chrec_b,
bc3c8ad4 2839 overlap_iterations_a, overlap_iterations_b,
2840 last_conflicts);
48e1416a 2841
2146e26d 2842 else if (siv_subscript_p (chrec_a, chrec_b))
48e1416a 2843 analyze_siv_subscript (chrec_a, chrec_b,
2844 overlap_iterations_a, overlap_iterations_b,
8ceddf57 2845 last_conflicts, lnn);
48e1416a 2846
2146e26d 2847 else
48e1416a 2848 analyze_miv_subscript (chrec_a, chrec_b,
bc3c8ad4 2849 overlap_iterations_a, overlap_iterations_b,
8c7b1d8e 2850 last_conflicts, loop_nest);
48e1416a 2851
2146e26d 2852 if (dump_file && (dump_flags & TDF_DETAILS))
2853 {
2854 fprintf (dump_file, " (overlap_iterations_a = ");
87da4f2e 2855 dump_conflict_function (dump_file, *overlap_iterations_a);
2146e26d 2856 fprintf (dump_file, ")\n (overlap_iterations_b = ");
87da4f2e 2857 dump_conflict_function (dump_file, *overlap_iterations_b);
2146e26d 2858 fprintf (dump_file, ")\n");
6b421feb 2859 fprintf (dump_file, ")\n");
2146e26d 2860 }
2861}
2862
b44d1046 2863/* Helper function for uniquely inserting distance vectors. */
2146e26d 2864
b44d1046 2865static void
2866save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2867{
2868 unsigned i;
2869 lambda_vector v;
2146e26d 2870
48148244 2871 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
b44d1046 2872 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2873 return;
2146e26d 2874
b44d1046 2875 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2876}
2146e26d 2877
b44d1046 2878/* Helper function for uniquely inserting direction vectors. */
2879
2880static void
2881save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2146e26d 2882{
2883 unsigned i;
b44d1046 2884 lambda_vector v;
6b421feb 2885
48148244 2886 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
b44d1046 2887 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2888 return;
2889
2890 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2891}
2892
2893/* Add a distance of 1 on all the loops outer than INDEX. If we
2894 haven't yet determined a distance for this outer loop, push a new
2895 distance vector composed of the previous distance, and a distance
2896 of 1 for this outer loop. Example:
2897
2898 | loop_1
2899 | loop_2
2900 | A[10]
2901 | endloop_2
2902 | endloop_1
2903
2904 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2905 save (0, 1), then we have to save (1, 0). */
2906
2907static void
2908add_outer_distances (struct data_dependence_relation *ddr,
2909 lambda_vector dist_v, int index)
2910{
2911 /* For each outer loop where init_v is not set, the accesses are
2912 in dependence of distance 1 in the loop. */
2913 while (--index >= 0)
2914 {
2915 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2916 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2917 save_v[index] = 1;
2918 save_dist_v (ddr, save_v);
2919 }
2920}
2921
2922/* Return false when fail to represent the data dependence as a
2923 distance vector. INIT_B is set to true when a component has been
2924 added to the distance vector DIST_V. INDEX_CARRY is then set to
2925 the index in DIST_V that carries the dependence. */
2926
2927static bool
2928build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2929 struct data_reference *ddr_a,
2930 struct data_reference *ddr_b,
2931 lambda_vector dist_v, bool *init_b,
2932 int *index_carry)
2933{
2934 unsigned i;
2935 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
6b421feb 2936
6b6f234c 2937 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2146e26d 2938 {
bc3c8ad4 2939 tree access_fn_a, access_fn_b;
6b6f234c 2940 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2146e26d 2941
2942 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
bc3c8ad4 2943 {
2944 non_affine_dependence_relation (ddr);
b44d1046 2945 return false;
bc3c8ad4 2946 }
2947
b44d1046 2948 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2949 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2146e26d 2950
48e1416a 2951 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
bc3c8ad4 2952 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2146e26d 2953 {
b44d1046 2954 int dist, index;
ba3fc4d0 2955 int var_a = CHREC_VARIABLE (access_fn_a);
2956 int var_b = CHREC_VARIABLE (access_fn_b);
b44d1046 2957
ba3fc4d0 2958 if (var_a != var_b
2959 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
bc3c8ad4 2960 {
2961 non_affine_dependence_relation (ddr);
b44d1046 2962 return false;
bc3c8ad4 2963 }
48e1416a 2964
58f77a87 2965 dist = int_cst_value (SUB_DISTANCE (subscript));
ba3fc4d0 2966 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2967 *index_carry = MIN (index, *index_carry);
2146e26d 2968
b44d1046 2969 /* This is the subscript coupling test. If we have already
2970 recorded a distance for this loop (a distance coming from
2971 another subscript), it should be the same. For example,
2972 in the following code, there is no dependence:
2973
2146e26d 2974 | loop i = 0, N, 1
2975 | T[i+1][i] = ...
2976 | ... = T[i][i]
2977 | endloop
b44d1046 2978 */
2979 if (init_v[index] != 0 && dist_v[index] != dist)
2146e26d 2980 {
6b6f234c 2981 finalize_ddr_dependent (ddr, chrec_known);
b44d1046 2982 return false;
2146e26d 2983 }
2984
b44d1046 2985 dist_v[index] = dist;
2986 init_v[index] = 1;
2987 *init_b = true;
2988 }
9c77efff 2989 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
b44d1046 2990 {
2991 /* This can be for example an affine vs. constant dependence
2992 (T[i] vs. T[3]) that is not an affine dependence and is
2993 not representable as a distance vector. */
2994 non_affine_dependence_relation (ddr);
2995 return false;
2146e26d 2996 }
2997 }
1532ec98 2998
b44d1046 2999 return true;
3000}
1532ec98 3001
2a21a5df 3002/* Return true when the DDR contains only constant access functions. */
3003
3004static bool
7ecb5bb2 3005constant_access_functions (const struct data_dependence_relation *ddr)
2a21a5df 3006{
3007 unsigned i;
3008
3009 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3010 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3011 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3012 return false;
3013
3014 return true;
3015}
3016
b44d1046 3017/* Helper function for the case where DDR_A and DDR_B are the same
bfe20447 3018 multivariate access function with a constant step. For an example
3019 see pr34635-1.c. */
bc3c8ad4 3020
b44d1046 3021static void
3022add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3023{
3024 int x_1, x_2;
3025 tree c_1 = CHREC_LEFT (c_2);
3026 tree c_0 = CHREC_LEFT (c_1);
3027 lambda_vector dist_v;
8fb896c4 3028 int v1, v2, cd;
bc3c8ad4 3029
bb11a0e8 3030 /* Polynomials with more than 2 variables are not handled yet. When
3031 the evolution steps are parameters, it is not possible to
3032 represent the dependence using classical distance vectors. */
3033 if (TREE_CODE (c_0) != INTEGER_CST
3034 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3035 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3036 {
3037 DDR_AFFINE_P (ddr) = false;
b44d1046 3038 return;
3039 }
1532ec98 3040
b44d1046 3041 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3042 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
1532ec98 3043
b44d1046 3044 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3045 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
58f77a87 3046 v1 = int_cst_value (CHREC_RIGHT (c_1));
3047 v2 = int_cst_value (CHREC_RIGHT (c_2));
8fb896c4 3048 cd = gcd (v1, v2);
3049 v1 /= cd;
3050 v2 /= cd;
3051
3052 if (v2 < 0)
3053 {
3054 v2 = -v2;
3055 v1 = -v1;
3056 }
3057
3058 dist_v[x_1] = v2;
3059 dist_v[x_2] = -v1;
b44d1046 3060 save_dist_v (ddr, dist_v);
1532ec98 3061
b44d1046 3062 add_outer_distances (ddr, dist_v, x_1);
3063}
1532ec98 3064
b44d1046 3065/* Helper function for the case where DDR_A and DDR_B are the same
3066 access functions. */
ffe944fa 3067
b44d1046 3068static void
3069add_other_self_distances (struct data_dependence_relation *ddr)
3070{
3071 lambda_vector dist_v;
3072 unsigned i;
3073 int index_carry = DDR_NB_LOOPS (ddr);
1532ec98 3074
b44d1046 3075 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
ffe944fa 3076 {
b44d1046 3077 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
1532ec98 3078
b44d1046 3079 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
1532ec98 3080 {
b44d1046 3081 if (!evolution_function_is_univariate_p (access_fun))
3082 {
3083 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3084 {
3085 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3086 return;
3087 }
3088
bfe20447 3089 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3090
3091 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3092 add_multivariate_self_dist (ddr, access_fun);
3093 else
3094 /* The evolution step is not constant: it varies in
3095 the outer loop, so this cannot be represented by a
3096 distance vector. For example in pr34635.c the
3097 evolution is {0, +, {0, +, 4}_1}_2. */
3098 DDR_AFFINE_P (ddr) = false;
3099
b44d1046 3100 return;
3101 }
3102
3103 index_carry = MIN (index_carry,
3104 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3105 DDR_LOOP_NEST (ddr)));
1532ec98 3106 }
ffe944fa 3107 }
3108
b44d1046 3109 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3110 add_outer_distances (ddr, dist_v, index_carry);
2146e26d 3111}
3112
2a21a5df 3113static void
3114insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3115{
3116 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3117
3118 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3119 save_dist_v (ddr, dist_v);
3120}
3121
3122/* Adds a unit distance vector to DDR when there is a 0 overlap. This
3123 is the case for example when access functions are the same and
3124 equal to a constant, as in:
3125
3126 | loop_1
3127 | A[3] = ...
3128 | ... = A[3]
3129 | endloop_1
3130
3131 in which case the distance vectors are (0) and (1). */
3132
3133static void
3134add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3135{
3136 unsigned i, j;
3137
3138 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3139 {
3140 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3141 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3142 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3143
3144 for (j = 0; j < ca->n; j++)
3145 if (affine_function_zero_p (ca->fns[j]))
3146 {
3147 insert_innermost_unit_dist_vector (ddr);
3148 return;
3149 }
3150
3151 for (j = 0; j < cb->n; j++)
3152 if (affine_function_zero_p (cb->fns[j]))
3153 {
3154 insert_innermost_unit_dist_vector (ddr);
3155 return;
3156 }
3157 }
3158}
3159
b44d1046 3160/* Compute the classic per loop distance vector. DDR is the data
3161 dependence relation to build a vector from. Return false when fail
3162 to represent the data dependence as a distance vector. */
2146e26d 3163
557ef5d8 3164static bool
8c7b1d8e 3165build_classic_dist_vector (struct data_dependence_relation *ddr,
3166 struct loop *loop_nest)
2146e26d 3167{
1532ec98 3168 bool init_b = false;
b44d1046 3169 int index_carry = DDR_NB_LOOPS (ddr);
3170 lambda_vector dist_v;
1532ec98 3171
6b6f234c 3172 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
d534eb36 3173 return false;
b44d1046 3174
3175 if (same_access_functions (ddr))
2146e26d 3176 {
b44d1046 3177 /* Save the 0 vector. */
3178 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3179 save_dist_v (ddr, dist_v);
2146e26d 3180
2a21a5df 3181 if (constant_access_functions (ddr))
3182 add_distance_for_zero_overlaps (ddr);
3183
b44d1046 3184 if (DDR_NB_LOOPS (ddr) > 1)
3185 add_other_self_distances (ddr);
bc3c8ad4 3186
b44d1046 3187 return true;
3188 }
bc3c8ad4 3189
b44d1046 3190 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3191 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3192 dist_v, &init_b, &index_carry))
3193 return false;
bc3c8ad4 3194
b44d1046 3195 /* Save the distance vector if we initialized one. */
3196 if (init_b)
3197 {
3198 /* Verify a basic constraint: classic distance vectors should
3199 always be lexicographically positive.
3200
3201 Data references are collected in the order of execution of
3202 the program, thus for the following loop
3203
3204 | for (i = 1; i < 100; i++)
3205 | for (j = 1; j < 100; j++)
3206 | {
3207 | t = T[j+1][i-1]; // A
3208 | T[j][i] = t + 2; // B
3209 | }
3210
3211 references are collected following the direction of the wind:
3212 A then B. The data dependence tests are performed also
3213 following this order, such that we're looking at the distance
3214 separating the elements accessed by A from the elements later
3215 accessed by B. But in this example, the distance returned by
3216 test_dep (A, B) is lexicographically negative (-1, 1), that
3217 means that the access A occurs later than B with respect to
3218 the outer loop, ie. we're actually looking upwind. In this
3219 case we solve test_dep (B, A) looking downwind to the
3220 lexicographically positive solution, that returns the
3221 distance vector (1, -1). */
3222 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3223 {
3224 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
d534eb36 3225 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3226 loop_nest))
3227 return false;
b44d1046 3228 compute_subscript_distance (ddr);
d534eb36 3229 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3230 save_v, &init_b, &index_carry))
3231 return false;
b44d1046 3232 save_dist_v (ddr, save_v);
0ecb94cf 3233 DDR_REVERSED_P (ddr) = true;
b44d1046 3234
3235 /* In this case there is a dependence forward for all the
3236 outer loops:
3237
3238 | for (k = 1; k < 100; k++)
3239 | for (i = 1; i < 100; i++)
3240 | for (j = 1; j < 100; j++)
3241 | {
3242 | t = T[j+1][i-1]; // A
3243 | T[j][i] = t + 2; // B
3244 | }
3245
48e1416a 3246 the vectors are:
b44d1046 3247 (0, 1, -1)
3248 (1, 1, -1)
3249 (1, -1, 1)
3250 */
3251 if (DDR_NB_LOOPS (ddr) > 1)
3252 {
3253 add_outer_distances (ddr, save_v, index_carry);
3254 add_outer_distances (ddr, dist_v, index_carry);
bc3c8ad4 3255 }
b44d1046 3256 }
3257 else
3258 {
3259 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3260 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
bc3c8ad4 3261
b44d1046 3262 if (DDR_NB_LOOPS (ddr) > 1)
2146e26d 3263 {
b44d1046 3264 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
bc3c8ad4 3265
d534eb36 3266 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3267 DDR_A (ddr), loop_nest))
3268 return false;
b44d1046 3269 compute_subscript_distance (ddr);
d534eb36 3270 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3271 opposite_v, &init_b,
3272 &index_carry))
3273 return false;
bc3c8ad4 3274
d534eb36 3275 save_dist_v (ddr, save_v);
b44d1046 3276 add_outer_distances (ddr, dist_v, index_carry);
3277 add_outer_distances (ddr, opposite_v, index_carry);
2146e26d 3278 }
d534eb36 3279 else
3280 save_dist_v (ddr, save_v);
2146e26d 3281 }
3282 }
b44d1046 3283 else
3284 {
3285 /* There is a distance of 1 on all the outer loops: Example:
3286 there is a dependence of distance 1 on loop_1 for the array A.
1532ec98 3287
b44d1046 3288 | loop_1
3289 | A[5] = ...
3290 | endloop
3291 */
3292 add_outer_distances (ddr, dist_v,
3293 lambda_vector_first_nz (dist_v,
3294 DDR_NB_LOOPS (ddr), 0));
3295 }
3296
3297 if (dump_file && (dump_flags & TDF_DETAILS))
1532ec98 3298 {
b44d1046 3299 unsigned i;
1532ec98 3300
b44d1046 3301 fprintf (dump_file, "(build_classic_dist_vector\n");
3302 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3303 {
3304 fprintf (dump_file, " dist_vector = (");
3305 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3306 DDR_NB_LOOPS (ddr));
3307 fprintf (dump_file, " )\n");
3308 }
3309 fprintf (dump_file, ")\n");
1532ec98 3310 }
3311
b44d1046 3312 return true;
3313}
2146e26d 3314
b44d1046 3315/* Return the direction for a given distance.
3316 FIXME: Computing dir this way is suboptimal, since dir can catch
3317 cases that dist is unable to represent. */
bc3c8ad4 3318
b44d1046 3319static inline enum data_dependence_direction
3320dir_from_dist (int dist)
3321{
3322 if (dist > 0)
3323 return dir_positive;
3324 else if (dist < 0)
3325 return dir_negative;
3326 else
3327 return dir_equal;
3328}
1532ec98 3329
b44d1046 3330/* Compute the classic per loop direction vector. DDR is the data
3331 dependence relation to build a vector from. */
1532ec98 3332
b44d1046 3333static void
3334build_classic_dir_vector (struct data_dependence_relation *ddr)
3335{
3336 unsigned i, j;
3337 lambda_vector dist_v;
bc3c8ad4 3338
48148244 3339 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
b44d1046 3340 {
3341 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
bc3c8ad4 3342
b44d1046 3343 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3344 dir_v[j] = dir_from_dist (dist_v[j]);
1532ec98 3345
b44d1046 3346 save_dir_v (ddr, dir_v);
3347 }
2146e26d 3348}
3349
b44d1046 3350/* Helper function. Returns true when there is a dependence between
3351 data references DRA and DRB. */
6b421feb 3352
b44d1046 3353static bool
3354subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3355 struct data_reference *dra,
8c7b1d8e 3356 struct data_reference *drb,
3357 struct loop *loop_nest)
6b421feb 3358{
3359 unsigned int i;
6b421feb 3360 tree last_conflicts;
41c7a324 3361 struct subscript *subscript;
b44d1046 3362
41c7a324 3363 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3364 i++)
6b421feb 3365 {
87da4f2e 3366 conflict_function *overlaps_a, *overlaps_b;
41c7a324 3367
48e1416a 3368 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
6b421feb 3369 DR_ACCESS_FN (drb, i),
48e1416a 3370 &overlaps_a, &overlaps_b,
8c7b1d8e 3371 &last_conflicts, loop_nest);
41c7a324 3372
87da4f2e 3373 if (CF_NOT_KNOWN_P (overlaps_a)
3374 || CF_NOT_KNOWN_P (overlaps_b))
6b421feb 3375 {
3376 finalize_ddr_dependent (ddr, chrec_dont_know);
3377 dependence_stats.num_dependence_undetermined++;
87da4f2e 3378 free_conflict_function (overlaps_a);
3379 free_conflict_function (overlaps_b);
b44d1046 3380 return false;
6b421feb 3381 }
41c7a324 3382
87da4f2e 3383 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3384 || CF_NO_DEPENDENCE_P (overlaps_b))
6b421feb 3385 {
3386 finalize_ddr_dependent (ddr, chrec_known);
3387 dependence_stats.num_dependence_independent++;
87da4f2e 3388 free_conflict_function (overlaps_a);
3389 free_conflict_function (overlaps_b);
b44d1046 3390 return false;
6b421feb 3391 }
41c7a324 3392
6b421feb 3393 else
3394 {
e34df749 3395 if (SUB_CONFLICTS_IN_A (subscript))
3396 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3397 if (SUB_CONFLICTS_IN_B (subscript))
3398 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3399
6b421feb 3400 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3401 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3402 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3403 }
3404 }
3405
b44d1046 3406 return true;
3407}
3408
8c7b1d8e 3409/* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
b44d1046 3410
3411static void
8c7b1d8e 3412subscript_dependence_tester (struct data_dependence_relation *ddr,
3413 struct loop *loop_nest)
b44d1046 3414{
48e1416a 3415
b44d1046 3416 if (dump_file && (dump_flags & TDF_DETAILS))
3417 fprintf (dump_file, "(subscript_dependence_tester \n");
48e1416a 3418
8c7b1d8e 3419 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
b44d1046 3420 dependence_stats.num_dependence_dependent++;
6b421feb 3421
6b421feb 3422 compute_subscript_distance (ddr);
8c7b1d8e 3423 if (build_classic_dist_vector (ddr, loop_nest))
b44d1046 3424 build_classic_dir_vector (ddr);
6b421feb 3425
3426 if (dump_file && (dump_flags & TDF_DETAILS))
3427 fprintf (dump_file, ")\n");
3428}
3429
2146e26d 3430/* Returns true when all the access functions of A are affine or
8c7b1d8e 3431 constant with respect to LOOP_NEST. */
2146e26d 3432
48e1416a 3433static bool
7ecb5bb2 3434access_functions_are_affine_or_constant_p (const struct data_reference *a,
3435 const struct loop *loop_nest)
2146e26d 3436{
3437 unsigned int i;
355572cc 3438 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
e38f1ca4 3439 tree t;
355572cc 3440
48148244 3441 FOR_EACH_VEC_ELT (tree, fns, i, t)
8c7b1d8e 3442 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3443 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
2146e26d 3444 return false;
48e1416a 3445
2146e26d 3446 return true;
3447}
3448
355572cc 3449/* Initializes an equation for an OMEGA problem using the information
3450 contained in the ACCESS_FUN. Returns true when the operation
3451 succeeded.
3452
3453 PB is the omega constraint system.
3454 EQ is the number of the equation to be initialized.
3455 OFFSET is used for shifting the variables names in the constraints:
3456 a constrain is composed of 2 * the number of variables surrounding
3457 dependence accesses. OFFSET is set either to 0 for the first n variables,
3458 then it is set to n.
3459 ACCESS_FUN is expected to be an affine chrec. */
3460
3461static bool
48e1416a 3462init_omega_eq_with_af (omega_pb pb, unsigned eq,
3463 unsigned int offset, tree access_fun,
355572cc 3464 struct data_dependence_relation *ddr)
3465{
3466 switch (TREE_CODE (access_fun))
3467 {
3468 case POLYNOMIAL_CHREC:
3469 {
3470 tree left = CHREC_LEFT (access_fun);
3471 tree right = CHREC_RIGHT (access_fun);
3472 int var = CHREC_VARIABLE (access_fun);
3473 unsigned var_idx;
3474
3475 if (TREE_CODE (right) != INTEGER_CST)
3476 return false;
3477
3478 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
58f77a87 3479 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
355572cc 3480
3481 /* Compute the innermost loop index. */
3482 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3483
3484 if (offset == 0)
48e1416a 3485 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
58f77a87 3486 += int_cst_value (right);
355572cc 3487
3488 switch (TREE_CODE (left))
3489 {
3490 case POLYNOMIAL_CHREC:
3491 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3492
3493 case INTEGER_CST:
58f77a87 3494 pb->eqs[eq].coef[0] += int_cst_value (left);
355572cc 3495 return true;
3496
3497 default:
3498 return false;
3499 }
3500 }
3501
3502 case INTEGER_CST:
58f77a87 3503 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
355572cc 3504 return true;
3505
3506 default:
3507 return false;
3508 }
3509}
3510
3511/* As explained in the comments preceding init_omega_for_ddr, we have
3512 to set up a system for each loop level, setting outer loops
3513 variation to zero, and current loop variation to positive or zero.
3514 Save each lexico positive distance vector. */
3515
3516static void
3517omega_extract_distance_vectors (omega_pb pb,
3518 struct data_dependence_relation *ddr)
3519{
3520 int eq, geq;
3521 unsigned i, j;
3522 struct loop *loopi, *loopj;
3523 enum omega_result res;
3524
3525 /* Set a new problem for each loop in the nest. The basis is the
3526 problem that we have initialized until now. On top of this we
3527 add new constraints. */
48e1416a 3528 for (i = 0; i <= DDR_INNER_LOOP (ddr)
355572cc 3529 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3530 {
3531 int dist = 0;
3532 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3533 DDR_NB_LOOPS (ddr));
3534
3535 omega_copy_problem (copy, pb);
3536
3537 /* For all the outer loops "loop_j", add "dj = 0". */
3538 for (j = 0;
3539 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3540 {
3541 eq = omega_add_zero_eq (copy, omega_black);
3542 copy->eqs[eq].coef[j + 1] = 1;
3543 }
3544
3545 /* For "loop_i", add "0 <= di". */
3546 geq = omega_add_zero_geq (copy, omega_black);
3547 copy->geqs[geq].coef[i + 1] = 1;
3548
3549 /* Reduce the constraint system, and test that the current
3550 problem is feasible. */
3551 res = omega_simplify_problem (copy);
48e1416a 3552 if (res == omega_false
355572cc 3553 || res == omega_unknown
3554 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3555 goto next_problem;
3556
3557 for (eq = 0; eq < copy->num_subs; eq++)
3558 if (copy->subs[eq].key == (int) i + 1)
3559 {
3560 dist = copy->subs[eq].coef[0];
3561 goto found_dist;
3562 }
3563
3564 if (dist == 0)
3565 {
3566 /* Reinitialize problem... */
3567 omega_copy_problem (copy, pb);
3568 for (j = 0;
3569 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3570 {
3571 eq = omega_add_zero_eq (copy, omega_black);
3572 copy->eqs[eq].coef[j + 1] = 1;
3573 }
3574
3575 /* ..., but this time "di = 1". */
3576 eq = omega_add_zero_eq (copy, omega_black);
3577 copy->eqs[eq].coef[i + 1] = 1;
3578 copy->eqs[eq].coef[0] = -1;
3579
3580 res = omega_simplify_problem (copy);
48e1416a 3581 if (res == omega_false
355572cc 3582 || res == omega_unknown
3583 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3584 goto next_problem;
3585
3586 for (eq = 0; eq < copy->num_subs; eq++)
3587 if (copy->subs[eq].key == (int) i + 1)
3588 {
3589 dist = copy->subs[eq].coef[0];
3590 goto found_dist;
3591 }
3592 }
3593
3594 found_dist:;
3595 /* Save the lexicographically positive distance vector. */
3596 if (dist >= 0)
3597 {
3598 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3599 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3600
3601 dist_v[i] = dist;
3602
3603 for (eq = 0; eq < copy->num_subs; eq++)
3604 if (copy->subs[eq].key > 0)
3605 {
3606 dist = copy->subs[eq].coef[0];
3607 dist_v[copy->subs[eq].key - 1] = dist;
3608 }
3609
3610 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3611 dir_v[j] = dir_from_dist (dist_v[j]);
3612
3613 save_dist_v (ddr, dist_v);
3614 save_dir_v (ddr, dir_v);
3615 }
3616
3617 next_problem:;
3618 omega_free_problem (copy);
3619 }
3620}
3621
3622/* This is called for each subscript of a tuple of data references:
3623 insert an equality for representing the conflicts. */
3624
3625static bool
3626omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3627 struct data_dependence_relation *ddr,
3628 omega_pb pb, bool *maybe_dependent)
3629{
3630 int eq;
9a3dad1c 3631 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3632 TREE_TYPE (access_fun_b));
75a70cf9 3633 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3634 tree fun_b = chrec_convert (type, access_fun_b, NULL);
9a3dad1c 3635 tree difference = chrec_fold_minus (type, fun_a, fun_b);
ff2cd6d1 3636 tree minus_one;
355572cc 3637
3638 /* When the fun_a - fun_b is not constant, the dependence is not
3639 captured by the classic distance vector representation. */
3640 if (TREE_CODE (difference) != INTEGER_CST)
3641 return false;
3642
3643 /* ZIV test. */
3644 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3645 {
3646 /* There is no dependence. */
3647 *maybe_dependent = false;
3648 return true;
3649 }
3650
ff2cd6d1 3651 minus_one = build_int_cst (type, -1);
3652 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
355572cc 3653
3654 eq = omega_add_zero_eq (pb, omega_black);
3655 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3656 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3657 /* There is probably a dependence, but the system of
3658 constraints cannot be built: answer "don't know". */
3659 return false;
3660
3661 /* GCD test. */
3662 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
48e1416a 3663 && !int_divides_p (lambda_vector_gcd
355572cc 3664 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3665 2 * DDR_NB_LOOPS (ddr)),
3666 pb->eqs[eq].coef[0]))
3667 {
3668 /* There is no dependence. */
3669 *maybe_dependent = false;
3670 return true;
3671 }
3672
3673 return true;
3674}
3675
3676/* Helper function, same as init_omega_for_ddr but specialized for
3677 data references A and B. */
3678
3679static bool
3680init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3681 struct data_dependence_relation *ddr,
3682 omega_pb pb, bool *maybe_dependent)
3683{
3684 unsigned i;
3685 int ineq;
3686 struct loop *loopi;
3687 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3688
3689 /* Insert an equality per subscript. */
3690 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3691 {
3692 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3693 ddr, pb, maybe_dependent))
3694 return false;
3695 else if (*maybe_dependent == false)
3696 {
3697 /* There is no dependence. */
3698 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3699 return true;
3700 }
3701 }
3702
3703 /* Insert inequalities: constraints corresponding to the iteration
3704 domain, i.e. the loops surrounding the references "loop_x" and
3705 the distance variables "dx". The layout of the OMEGA
3706 representation is as follows:
3707 - coef[0] is the constant
3708 - coef[1..nb_loops] are the protected variables that will not be
3709 removed by the solver: the "dx"
3710 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3711 */
48e1416a 3712 for (i = 0; i <= DDR_INNER_LOOP (ddr)
355572cc 3713 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3714 {
4ee81b10 3715 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
355572cc 3716
3717 /* 0 <= loop_x */
3718 ineq = omega_add_zero_geq (pb, omega_black);
3719 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3720
3721 /* 0 <= loop_x + dx */
3722 ineq = omega_add_zero_geq (pb, omega_black);
3723 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3724 pb->geqs[ineq].coef[i + 1] = 1;
3725
3726 if (nbi != -1)
3727 {
3728 /* loop_x <= nb_iters */
3729 ineq = omega_add_zero_geq (pb, omega_black);
3730 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3731 pb->geqs[ineq].coef[0] = nbi;
3732
3733 /* loop_x + dx <= nb_iters */
3734 ineq = omega_add_zero_geq (pb, omega_black);
3735 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3736 pb->geqs[ineq].coef[i + 1] = -1;
3737 pb->geqs[ineq].coef[0] = nbi;
3738
3739 /* A step "dx" bigger than nb_iters is not feasible, so
3740 add "0 <= nb_iters + dx", */
3741 ineq = omega_add_zero_geq (pb, omega_black);
3742 pb->geqs[ineq].coef[i + 1] = 1;
3743 pb->geqs[ineq].coef[0] = nbi;
3744 /* and "dx <= nb_iters". */
3745 ineq = omega_add_zero_geq (pb, omega_black);
3746 pb->geqs[ineq].coef[i + 1] = -1;
3747 pb->geqs[ineq].coef[0] = nbi;
3748 }
3749 }
3750
3751 omega_extract_distance_vectors (pb, ddr);
3752
3753 return true;
3754}
3755
3756/* Sets up the Omega dependence problem for the data dependence
3757 relation DDR. Returns false when the constraint system cannot be
3758 built, ie. when the test answers "don't know". Returns true
3759 otherwise, and when independence has been proved (using one of the
3760 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3761 set MAYBE_DEPENDENT to true.
3762
3763 Example: for setting up the dependence system corresponding to the
48e1416a 3764 conflicting accesses
355572cc 3765
3766 | loop_i
3767 | loop_j
3768 | A[i, i+1] = ...
3769 | ... A[2*j, 2*(i + j)]
3770 | endloop_j
3771 | endloop_i
48e1416a 3772
355572cc 3773 the following constraints come from the iteration domain:
3774
3775 0 <= i <= Ni
3776 0 <= i + di <= Ni
3777 0 <= j <= Nj
3778 0 <= j + dj <= Nj
3779
3780 where di, dj are the distance variables. The constraints
3781 representing the conflicting elements are:
3782
3783 i = 2 * (j + dj)
3784 i + 1 = 2 * (i + di + j + dj)
3785
3786 For asking that the resulting distance vector (di, dj) be
3787 lexicographically positive, we insert the constraint "di >= 0". If
3788 "di = 0" in the solution, we fix that component to zero, and we
3789 look at the inner loops: we set a new problem where all the outer
3790 loop distances are zero, and fix this inner component to be
3791 positive. When one of the components is positive, we save that
3792 distance, and set a new problem where the distance on this loop is
3793 zero, searching for other distances in the inner loops. Here is
3794 the classic example that illustrates that we have to set for each
3795 inner loop a new problem:
3796
3797 | loop_1
3798 | loop_2
3799 | A[10]
3800 | endloop_2
3801 | endloop_1
3802
3803 we have to save two distances (1, 0) and (0, 1).
3804
3805 Given two array references, refA and refB, we have to set the
3806 dependence problem twice, refA vs. refB and refB vs. refA, and we
3807 cannot do a single test, as refB might occur before refA in the
3808 inner loops, and the contrary when considering outer loops: ex.
3809
3810 | loop_0
3811 | loop_1
3812 | loop_2
3813 | T[{1,+,1}_2][{1,+,1}_1] // refA
3814 | T[{2,+,1}_2][{0,+,1}_1] // refB
3815 | endloop_2
3816 | endloop_1
3817 | endloop_0
3818
3819 refB touches the elements in T before refA, and thus for the same
3820 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3821 but for successive loop_0 iterations, we have (1, -1, 1)
3822
3823 The Omega solver expects the distance variables ("di" in the
3824 previous example) to come first in the constraint system (as
3825 variables to be protected, or "safe" variables), the constraint
3826 system is built using the following layout:
3827
3828 "cst | distance vars | index vars".
3829*/
3830
3831static bool
3832init_omega_for_ddr (struct data_dependence_relation *ddr,
3833 bool *maybe_dependent)
3834{
3835 omega_pb pb;
3836 bool res = false;
3837
3838 *maybe_dependent = true;
3839
3840 if (same_access_functions (ddr))
3841 {
3842 unsigned j;
3843 lambda_vector dir_v;
3844
3845 /* Save the 0 vector. */
3846 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3847 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3848 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3849 dir_v[j] = dir_equal;
3850 save_dir_v (ddr, dir_v);
3851
3852 /* Save the dependences carried by outer loops. */
3853 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3854 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3855 maybe_dependent);
3856 omega_free_problem (pb);
3857 return res;
3858 }
3859
3860 /* Omega expects the protected variables (those that have to be kept
3861 after elimination) to appear first in the constraint system.
3862 These variables are the distance variables. In the following
3863 initialization we declare NB_LOOPS safe variables, and the total
3864 number of variables for the constraint system is 2*NB_LOOPS. */
3865 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3866 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3867 maybe_dependent);
3868 omega_free_problem (pb);
3869
3870 /* Stop computation if not decidable, or no dependence. */
3871 if (res == false || *maybe_dependent == false)
3872 return res;
3873
3874 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3875 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3876 maybe_dependent);
3877 omega_free_problem (pb);
3878
3879 return res;
3880}
3881
3882/* Return true when DDR contains the same information as that stored
3883 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3884
3885static bool
3886ddr_consistent_p (FILE *file,
3887 struct data_dependence_relation *ddr,
3888 VEC (lambda_vector, heap) *dist_vects,
3889 VEC (lambda_vector, heap) *dir_vects)
3890{
3891 unsigned int i, j;
3892
3893 /* If dump_file is set, output there. */
3894 if (dump_file && (dump_flags & TDF_DETAILS))
3895 file = dump_file;
3896
3897 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3898 {
3899 lambda_vector b_dist_v;
3900 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3901 VEC_length (lambda_vector, dist_vects),
3902 DDR_NUM_DIST_VECTS (ddr));
3903
3904 fprintf (file, "Banerjee dist vectors:\n");
48148244 3905 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
355572cc 3906 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3907
3908 fprintf (file, "Omega dist vectors:\n");
3909 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3910 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3911
3912 fprintf (file, "data dependence relation:\n");
3913 dump_data_dependence_relation (file, ddr);
3914
3915 fprintf (file, ")\n");
3916 return false;
3917 }
3918
3919 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3920 {
3921 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3922 VEC_length (lambda_vector, dir_vects),
3923 DDR_NUM_DIR_VECTS (ddr));
3924 return false;
3925 }
3926
3927 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3928 {
3929 lambda_vector a_dist_v;
3930 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3931
3932 /* Distance vectors are not ordered in the same way in the DDR
3933 and in the DIST_VECTS: search for a matching vector. */
48148244 3934 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
355572cc 3935 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3936 break;
3937
3938 if (j == VEC_length (lambda_vector, dist_vects))
3939 {
3940 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3941 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3942 fprintf (file, "not found in Omega dist vectors:\n");
3943 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3944 fprintf (file, "data dependence relation:\n");
3945 dump_data_dependence_relation (file, ddr);
3946 fprintf (file, ")\n");
3947 }
3948 }
3949
3950 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3951 {
3952 lambda_vector a_dir_v;
3953 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3954
3955 /* Direction vectors are not ordered in the same way in the DDR
3956 and in the DIR_VECTS: search for a matching vector. */
48148244 3957 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
355572cc 3958 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3959 break;
3960
3961 if (j == VEC_length (lambda_vector, dist_vects))
3962 {
3963 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3964 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3965 fprintf (file, "not found in Omega dir vectors:\n");
3966 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3967 fprintf (file, "data dependence relation:\n");
3968 dump_data_dependence_relation (file, ddr);
3969 fprintf (file, ")\n");
3970 }
3971 }
3972
48e1416a 3973 return true;
355572cc 3974}
3975
8c7b1d8e 3976/* This computes the affine dependence relation between A and B with
3977 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3978 independence between two accesses, while CHREC_DONT_KNOW is used
3979 for representing the unknown relation.
48e1416a 3980
2146e26d 3981 Note that it is possible to stop the computation of the dependence
3982 relation the first time we detect a CHREC_KNOWN element for a given
3983 subscript. */
3984
6b421feb 3985static void
8c7b1d8e 3986compute_affine_dependence (struct data_dependence_relation *ddr,
3987 struct loop *loop_nest)
2146e26d 3988{
3989 struct data_reference *dra = DDR_A (ddr);
3990 struct data_reference *drb = DDR_B (ddr);
48e1416a 3991
2146e26d 3992 if (dump_file && (dump_flags & TDF_DETAILS))
3993 {
6b6f234c 3994 fprintf (dump_file, "(compute_affine_dependence\n");
2146e26d 3995 fprintf (dump_file, " (stmt_a = \n");
75a70cf9 3996 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
2146e26d 3997 fprintf (dump_file, ")\n (stmt_b = \n");
75a70cf9 3998 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
2146e26d 3999 fprintf (dump_file, ")\n");
4000 }
6b421feb 4001
2146e26d 4002 /* Analyze only when the dependence relation is not yet known. */
c127dd86 4003 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4004 && !DDR_SELF_REFERENCE (ddr))
2146e26d 4005 {
6b421feb 4006 dependence_stats.num_dependence_tests++;
4007
8c7b1d8e 4008 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4009 && access_functions_are_affine_or_constant_p (drb, loop_nest))
355572cc 4010 {
4011 if (flag_check_data_deps)
4012 {
4013 /* Compute the dependences using the first algorithm. */
8c7b1d8e 4014 subscript_dependence_tester (ddr, loop_nest);
355572cc 4015
4016 if (dump_file && (dump_flags & TDF_DETAILS))
4017 {
4018 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4019 dump_data_dependence_relation (dump_file, ddr);
4020 }
4021
4022 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4023 {
4024 bool maybe_dependent;
4025 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4026
4027 /* Save the result of the first DD analyzer. */
4028 dist_vects = DDR_DIST_VECTS (ddr);
4029 dir_vects = DDR_DIR_VECTS (ddr);
4030
4031 /* Reset the information. */
4032 DDR_DIST_VECTS (ddr) = NULL;
4033 DDR_DIR_VECTS (ddr) = NULL;
4034
4035 /* Compute the same information using Omega. */
4036 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4037 goto csys_dont_know;
4038
4039 if (dump_file && (dump_flags & TDF_DETAILS))
4040 {
4041 fprintf (dump_file, "Omega Analyzer\n");
4042 dump_data_dependence_relation (dump_file, ddr);
4043 }
4044
4045 /* Check that we get the same information. */
4046 if (maybe_dependent)
4047 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4048 dir_vects));
4049 }
4050 }
4051 else
8c7b1d8e 4052 subscript_dependence_tester (ddr, loop_nest);
355572cc 4053 }
48e1416a 4054
2146e26d 4055 /* As a last case, if the dependence cannot be determined, or if
4056 the dependence is considered too difficult to determine, answer
4057 "don't know". */
4058 else
6b421feb 4059 {
355572cc 4060 csys_dont_know:;
6b421feb 4061 dependence_stats.num_dependence_undetermined++;
4062
4063 if (dump_file && (dump_flags & TDF_DETAILS))
4064 {
4065 fprintf (dump_file, "Data ref a:\n");
4066 dump_data_reference (dump_file, dra);
4067 fprintf (dump_file, "Data ref b:\n");
4068 dump_data_reference (dump_file, drb);
4069 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4070 }
4071 finalize_ddr_dependent (ddr, chrec_dont_know);
4072 }
2146e26d 4073 }
48e1416a 4074
2146e26d 4075 if (dump_file && (dump_flags & TDF_DETAILS))
4076 fprintf (dump_file, ")\n");
4077}
4078
02d46b9c 4079/* This computes the dependence relation for the same data
4080 reference into DDR. */
4081
4082static void
4083compute_self_dependence (struct data_dependence_relation *ddr)
4084{
4085 unsigned int i;
41c7a324 4086 struct subscript *subscript;
02d46b9c 4087
80bb306a 4088 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4089 return;
4090
41c7a324 4091 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4092 i++)
02d46b9c 4093 {
e34df749 4094 if (SUB_CONFLICTS_IN_A (subscript))
4095 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4096 if (SUB_CONFLICTS_IN_B (subscript))
4097 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4098
02d46b9c 4099 /* The accessed index overlaps for each iteration. */
87da4f2e 4100 SUB_CONFLICTS_IN_A (subscript)
e34df749 4101 = conflict_fn (1, affine_fn_cst (integer_zero_node));
87da4f2e 4102 SUB_CONFLICTS_IN_B (subscript)
e34df749 4103 = conflict_fn (1, affine_fn_cst (integer_zero_node));
02d46b9c 4104 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4105 }
02d46b9c 4106
6b421feb 4107 /* The distance vector is the zero vector. */
b44d1046 4108 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4109 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
6b421feb 4110}
544bab50 4111
b44d1046 4112/* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4113 the data references in DATAREFS, in the LOOP_NEST. When
41c7a324 4114 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4115 relations. */
2146e26d 4116
48e1416a 4117void
41c7a324 4118compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
2af73e54 4119 VEC (ddr_p, heap) **dependence_relations,
b44d1046 4120 VEC (loop_p, heap) *loop_nest,
41c7a324 4121 bool compute_self_and_rr)
2146e26d 4122{
41c7a324 4123 struct data_dependence_relation *ddr;
4124 struct data_reference *a, *b;
4125 unsigned int i, j;
2146e26d 4126
48148244 4127 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
41c7a324 4128 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
9ff25603 4129 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
41c7a324 4130 {
4131 ddr = initialize_data_dependence_relation (a, b, loop_nest);
2af73e54 4132 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
37545e54 4133 if (loop_nest)
4134 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
41c7a324 4135 }
02d46b9c 4136
41c7a324 4137 if (compute_self_and_rr)
48148244 4138 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
2146e26d 4139 {
41c7a324 4140 ddr = initialize_data_dependence_relation (a, a, loop_nest);
2af73e54 4141 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
41c7a324 4142 compute_self_dependence (ddr);
2146e26d 4143 }
4144}
4145
faa56cf9 4146/* Stores the locations of memory references in STMT to REFERENCES. Returns
4147 true if STMT clobbers memory, false otherwise. */
4148
4149bool
75a70cf9 4150get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
faa56cf9 4151{
4152 bool clobbers_memory = false;
4153 data_ref_loc *ref;
75a70cf9 4154 tree *op0, *op1;
4155 enum gimple_code stmt_code = gimple_code (stmt);
faa56cf9 4156
4157 *references = NULL;
4158
4159 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4160 Calls have side-effects, except those to const or pure
4161 functions. */
75a70cf9 4162 if ((stmt_code == GIMPLE_CALL
4163 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4164 || (stmt_code == GIMPLE_ASM
4165 && gimple_asm_volatile_p (stmt)))
faa56cf9 4166 clobbers_memory = true;
4167
dd277d48 4168 if (!gimple_vuse (stmt))
faa56cf9 4169 return clobbers_memory;
4170
75a70cf9 4171 if (stmt_code == GIMPLE_ASSIGN)
faa56cf9 4172 {
ffb04365 4173 tree base;
75a70cf9 4174 op0 = gimple_assign_lhs_ptr (stmt);
4175 op1 = gimple_assign_rhs1_ptr (stmt);
48e1416a 4176
faa56cf9 4177 if (DECL_P (*op1)
ffb04365 4178 || (REFERENCE_CLASS_P (*op1)
4179 && (base = get_base_address (*op1))
4180 && TREE_CODE (base) != SSA_NAME))
faa56cf9 4181 {
4182 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4183 ref->pos = op1;
4184 ref->is_read = true;
4185 }
4186
4187 if (DECL_P (*op0)
dab9b858 4188 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
faa56cf9 4189 {
4190 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4191 ref->pos = op0;
4192 ref->is_read = false;
4193 }
4194 }
75a70cf9 4195 else if (stmt_code == GIMPLE_CALL)
faa56cf9 4196 {
75a70cf9 4197 unsigned i, n = gimple_call_num_args (stmt);
d97e22fb 4198
4199 for (i = 0; i < n; i++)
faa56cf9 4200 {
75a70cf9 4201 op0 = gimple_call_arg_ptr (stmt, i);
d97e22fb 4202
faa56cf9 4203 if (DECL_P (*op0)
dab9b858 4204 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
faa56cf9 4205 {
4206 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4207 ref->pos = op0;
4208 ref->is_read = true;
4209 }
4210 }
4211 }
4212
4213 return clobbers_memory;
4214}
4215
4216/* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
80bb306a 4217 reference, returns false, otherwise returns true. NEST is the outermost
255b6be7 4218 loop of the loop nest in which the references should be analyzed. */
faa56cf9 4219
255b6be7 4220bool
75a70cf9 4221find_data_references_in_stmt (struct loop *nest, gimple stmt,
faa56cf9 4222 VEC (data_reference_p, heap) **datarefs)
4223{
4224 unsigned i;
4225 VEC (data_ref_loc, heap) *references;
4226 data_ref_loc *ref;
4227 bool ret = true;
4228 data_reference_p dr;
4229
4230 if (get_references_in_stmt (stmt, &references))
4231 {
4232 VEC_free (data_ref_loc, heap, references);
4233 return false;
4234 }
4235
48148244 4236 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
faa56cf9 4237 {
221a697e 4238 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4239 *ref->pos, stmt, ref->is_read);
ad4a85ad 4240 gcc_assert (dr != NULL);
48e1416a 4241
4242 /* FIXME -- data dependence analysis does not work correctly for objects
37545e54 4243 with invariant addresses in loop nests. Let us fail here until the
4244 problem is fixed. */
4245 if (dr_address_invariant_p (dr) && nest)
faa56cf9 4246 {
ad4a85ad 4247 free_data_ref (dr);
4248 if (dump_file && (dump_flags & TDF_DETAILS))
4249 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
faa56cf9 4250 ret = false;
4251 break;
4252 }
ad4a85ad 4253
4254 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
faa56cf9 4255 }
4256 VEC_free (data_ref_loc, heap, references);
4257 return ret;
4258}
4259
221a697e 4260/* Stores the data references in STMT to DATAREFS. If there is an
4261 unanalyzable reference, returns false, otherwise returns true.
4262 NEST is the outermost loop of the loop nest in which the references
4263 should be instantiated, LOOP is the loop in which the references
4264 should be analyzed. */
c794d738 4265
4266bool
221a697e 4267graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
c794d738 4268 VEC (data_reference_p, heap) **datarefs)
4269{
4270 unsigned i;
4271 VEC (data_ref_loc, heap) *references;
4272 data_ref_loc *ref;
4273 bool ret = true;
4274 data_reference_p dr;
4275
4276 if (get_references_in_stmt (stmt, &references))
4277 {
4278 VEC_free (data_ref_loc, heap, references);
4279 return false;
4280 }
4281
48148244 4282 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
c794d738 4283 {
221a697e 4284 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
c794d738 4285 gcc_assert (dr != NULL);
4286 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4287 }
4288
4289 VEC_free (data_ref_loc, heap, references);
4290 return ret;
4291}
4292
37545e54 4293/* Search the data references in LOOP, and record the information into
4294 DATAREFS. Returns chrec_dont_know when failing to analyze a
4295 difficult case, returns NULL_TREE otherwise. */
4296
4297static tree
4298find_data_references_in_bb (struct loop *loop, basic_block bb,
4299 VEC (data_reference_p, heap) **datarefs)
4300{
4301 gimple_stmt_iterator bsi;
4302
4303 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4304 {
4305 gimple stmt = gsi_stmt (bsi);
4306
4307 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4308 {
4309 struct data_reference *res;
4310 res = XCNEW (struct data_reference);
4311 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4312
4313 return chrec_dont_know;
4314 }
4315 }
4316
4317 return NULL_TREE;
4318}
4319
2146e26d 4320/* Search the data references in LOOP, and record the information into
4321 DATAREFS. Returns chrec_dont_know when failing to analyze a
4322 difficult case, returns NULL_TREE otherwise.
80bb306a 4323
557ef5d8 4324 TODO: This function should be made smarter so that it can handle address
4325 arithmetic as if they were array accesses, etc. */
2146e26d 4326
48e1416a 4327tree
41c7a324 4328find_data_references_in_loop (struct loop *loop,
04bd1b81 4329 VEC (data_reference_p, heap) **datarefs)
2146e26d 4330{
0332ea54 4331 basic_block bb, *bbs;
4332 unsigned int i;
bc3c8ad4 4333
ad4a85ad 4334 bbs = get_loop_body_in_dom_order (loop);
0332ea54 4335
4336 for (i = 0; i < loop->num_nodes; i++)
2146e26d 4337 {
0332ea54 4338 bb = bbs[i];
4339
37545e54 4340 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4341 {
4342 free (bbs);
4343 return chrec_dont_know;
4344 }
2146e26d 4345 }
0332ea54 4346 free (bbs);
4347
4a012ac6 4348 return NULL_TREE;
2146e26d 4349}
4350
b44d1046 4351/* Recursive helper function. */
4352
4353static bool
62761183 4354find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
b44d1046 4355{
4356 /* Inner loops of the nest should not contain siblings. Example:
4357 when there are two consecutive loops,
4358
4359 | loop_0
4360 | loop_1
4361 | A[{0, +, 1}_1]
4362 | endloop_1
4363 | loop_2
4364 | A[{0, +, 1}_2]
4365 | endloop_2
4366 | endloop_0
4367
4368 the dependence relation cannot be captured by the distance
4369 abstraction. */
4370 if (loop->next)
4371 return false;
2146e26d 4372
62761183 4373 VEC_safe_push (loop_p, heap, *loop_nest, loop);
b44d1046 4374 if (loop->inner)
4375 return find_loop_nest_1 (loop->inner, loop_nest);
4376 return true;
4377}
4378
4379/* Return false when the LOOP is not well nested. Otherwise return
4380 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4381 contain the loops from the outermost to the innermost, as they will
4382 appear in the classic distance vector. */
4383
5c205353 4384bool
62761183 4385find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
b44d1046 4386{
62761183 4387 VEC_safe_push (loop_p, heap, *loop_nest, loop);
b44d1046 4388 if (loop->inner)
4389 return find_loop_nest_1 (loop->inner, loop_nest);
4390 return true;
4391}
2146e26d 4392
b79b3386 4393/* Returns true when the data dependences have been computed, false otherwise.
4394 Given a loop nest LOOP, the following vectors are returned:
48e1416a 4395 DATAREFS is initialized to all the array elements contained in this loop,
4396 DEPENDENCE_RELATIONS contains the relations between the data references.
4397 Compute read-read and self relations if
516849c7 4398 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
2146e26d 4399
b79b3386 4400bool
48e1416a 4401compute_data_dependences_for_loop (struct loop *loop,
516849c7 4402 bool compute_self_and_read_read_dependences,
a8af2e86 4403 VEC (loop_p, heap) **loop_nest,
04bd1b81 4404 VEC (data_reference_p, heap) **datarefs,
4405 VEC (ddr_p, heap) **dependence_relations)
2146e26d 4406{
b79b3386 4407 bool res = true;
516849c7 4408
6b421feb 4409 memset (&dependence_stats, 0, sizeof (dependence_stats));
2146e26d 4410
48e1416a 4411 /* If the loop nest is not well formed, or one of the data references
b44d1046 4412 is not computable, give up without spending time to compute other
4413 dependences. */
80bb306a 4414 if (!loop
a8af2e86 4415 || !find_loop_nest (loop, loop_nest)
b44d1046 4416 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
2146e26d 4417 {
4418 struct data_dependence_relation *ddr;
4419
4420 /* Insert a single relation into dependence_relations:
4421 chrec_dont_know. */
a8af2e86 4422 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
04bd1b81 4423 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
b79b3386 4424 res = false;
2146e26d 4425 }
b44d1046 4426 else
a8af2e86 4427 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
41c7a324 4428 compute_self_and_read_read_dependences);
6b421feb 4429
4430 if (dump_file && (dump_flags & TDF_STATS))
2146e26d 4431 {
6b421feb 4432 fprintf (dump_file, "Dependence tester statistics:\n");
4433
48e1416a 4434 fprintf (dump_file, "Number of dependence tests: %d\n",
6b421feb 4435 dependence_stats.num_dependence_tests);
48e1416a 4436 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6b421feb 4437 dependence_stats.num_dependence_dependent);
48e1416a 4438 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6b421feb 4439 dependence_stats.num_dependence_independent);
48e1416a 4440 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6b421feb 4441 dependence_stats.num_dependence_undetermined);
4442
48e1416a 4443 fprintf (dump_file, "Number of subscript tests: %d\n",
6b421feb 4444 dependence_stats.num_subscript_tests);
48e1416a 4445 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6b421feb 4446 dependence_stats.num_subscript_undetermined);
48e1416a 4447 fprintf (dump_file, "Number of same subscript function: %d\n",
6b421feb 4448 dependence_stats.num_same_subscript_function);
4449
4450 fprintf (dump_file, "Number of ziv tests: %d\n",
4451 dependence_stats.num_ziv);
4452 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4453 dependence_stats.num_ziv_dependent);
4454 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4455 dependence_stats.num_ziv_independent);
4456 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
48e1416a 4457 dependence_stats.num_ziv_unimplemented);
6b421feb 4458
48e1416a 4459 fprintf (dump_file, "Number of siv tests: %d\n",
6b421feb 4460 dependence_stats.num_siv);
4461 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4462 dependence_stats.num_siv_dependent);
4463 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4464 dependence_stats.num_siv_independent);
4465 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4466 dependence_stats.num_siv_unimplemented);
4467
48e1416a 4468 fprintf (dump_file, "Number of miv tests: %d\n",
6b421feb 4469 dependence_stats.num_miv);
4470 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4471 dependence_stats.num_miv_dependent);
4472 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4473 dependence_stats.num_miv_independent);
4474 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4475 dependence_stats.num_miv_unimplemented);
b79b3386 4476 }
4477
4478 return res;
2146e26d 4479}
4480
48e1416a 4481/* Returns true when the data dependences for the basic block BB have been
37545e54 4482 computed, false otherwise.
48e1416a 4483 DATAREFS is initialized to all the array elements contained in this basic
37545e54 4484 block, DEPENDENCE_RELATIONS contains the relations between the data
4485 references. Compute read-read and self relations if
4486 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4487bool
4488compute_data_dependences_for_bb (basic_block bb,
4489 bool compute_self_and_read_read_dependences,
4490 VEC (data_reference_p, heap) **datarefs,
4491 VEC (ddr_p, heap) **dependence_relations)
4492{
4493 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4494 return false;
4495
4496 compute_all_dependences (*datarefs, dependence_relations, NULL,
4497 compute_self_and_read_read_dependences);
4498 return true;
4499}
4500
2146e26d 4501/* Entry point (for testing only). Analyze all the data references
355572cc 4502 and the dependence relations in LOOP.
2146e26d 4503
48e1416a 4504 The data references are computed first.
4505
2146e26d 4506 A relation on these nodes is represented by a complete graph. Some
4507 of the relations could be of no interest, thus the relations can be
4508 computed on demand.
48e1416a 4509
2146e26d 4510 In the following function we compute all the relations. This is
4511 just a first implementation that is here for:
48e1416a 4512 - for showing how to ask for the dependence relations,
2146e26d 4513 - for the debugging the whole dependence graph,
4514 - for the dejagnu testcases and maintenance.
48e1416a 4515
2146e26d 4516 It is possible to ask only for a part of the graph, avoiding to
4517 compute the whole dependence graph. The computed dependences are
4518 stored in a knowledge base (KB) such that later queries don't
4519 recompute the same information. The implementation of this KB is
4520 transparent to the optimizer, and thus the KB can be changed with a
4521 more efficient implementation, or the KB could be disabled. */
48e1416a 4522static void
355572cc 4523analyze_all_data_dependences (struct loop *loop)
2146e26d 4524{
4525 unsigned int i;
2146e26d 4526 int nb_data_refs = 10;
48e1416a 4527 VEC (data_reference_p, heap) *datarefs =
41c7a324 4528 VEC_alloc (data_reference_p, heap, nb_data_refs);
48e1416a 4529 VEC (ddr_p, heap) *dependence_relations =
41c7a324 4530 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
a8af2e86 4531 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
2146e26d 4532
4533 /* Compute DDs on the whole function. */
a8af2e86 4534 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
355572cc 4535 &dependence_relations);
2146e26d 4536
4537 if (dump_file)
4538 {
4539 dump_data_dependence_relations (dump_file, dependence_relations);
4540 fprintf (dump_file, "\n\n");
2146e26d 4541
bc3c8ad4 4542 if (dump_flags & TDF_DETAILS)
4543 dump_dist_dir_vectors (dump_file, dependence_relations);
2146e26d 4544
bc3c8ad4 4545 if (dump_flags & TDF_STATS)
2146e26d 4546 {
bc3c8ad4 4547 unsigned nb_top_relations = 0;
4548 unsigned nb_bot_relations = 0;
bc3c8ad4 4549 unsigned nb_chrec_relations = 0;
41c7a324 4550 struct data_dependence_relation *ddr;
bc3c8ad4 4551
48148244 4552 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
bc3c8ad4 4553 {
bc3c8ad4 4554 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4555 nb_top_relations++;
48e1416a 4556
bc3c8ad4 4557 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
dd277d48 4558 nb_bot_relations++;
48e1416a 4559
4560 else
bc3c8ad4 4561 nb_chrec_relations++;
4562 }
48e1416a 4563
bc3c8ad4 4564 gather_stats_on_scev_database ();
4565 }
2146e26d 4566 }
6b6f234c 4567
a8af2e86 4568 VEC_free (loop_p, heap, loop_nest);
6b6f234c 4569 free_dependence_relations (dependence_relations);
4570 free_data_refs (datarefs);
4571}
355572cc 4572
4573/* Computes all the data dependences and check that the results of
4574 several analyzers are the same. */
4575
4576void
4577tree_check_data_deps (void)
4578{
4579 loop_iterator li;
4580 struct loop *loop_nest;
4581
4582 FOR_EACH_LOOP (li, loop_nest, 0)
4583 analyze_all_data_dependences (loop_nest);
4584}
6b6f234c 4585
4586/* Free the memory used by a data dependence relation DDR. */
4587
4588void
4589free_dependence_relation (struct data_dependence_relation *ddr)
4590{
4591 if (ddr == NULL)
4592 return;
4593
d534eb36 4594 if (DDR_SUBSCRIPTS (ddr))
87da4f2e 4595 free_subscripts (DDR_SUBSCRIPTS (ddr));
d534eb36 4596 if (DDR_DIST_VECTS (ddr))
4597 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4598 if (DDR_DIR_VECTS (ddr))
4599 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
41c7a324 4600
6b6f234c 4601 free (ddr);
4602}
4603
4604/* Free the memory used by the data dependence relations from
4605 DEPENDENCE_RELATIONS. */
4606
48e1416a 4607void
41c7a324 4608free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
6b6f234c 4609{
4610 unsigned int i;
41c7a324 4611 struct data_dependence_relation *ddr;
6b6f234c 4612
48148244 4613 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
a8af2e86 4614 if (ddr)
62761183 4615 free_dependence_relation (ddr);
41c7a324 4616
4617 VEC_free (ddr_p, heap, dependence_relations);
2146e26d 4618}
4619
6b6f234c 4620/* Free the memory used by the data references from DATAREFS. */
4621
4622void
41c7a324 4623free_data_refs (VEC (data_reference_p, heap) *datarefs)
6b6f234c 4624{
4625 unsigned int i;
41c7a324 4626 struct data_reference *dr;
2146e26d 4627
48148244 4628 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
31e389a2 4629 free_data_ref (dr);
41c7a324 4630 VEC_free (data_reference_p, heap, datarefs);
6b6f234c 4631}
bc3c8ad4 4632
74c8f69a 4633\f
4634
801c5610 4635/* Dump vertex I in RDG to FILE. */
74c8f69a 4636
801c5610 4637void
4638dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4639{
4640 struct vertex *v = &(rdg->vertices[i]);
4641 struct graph_edge *e;
4642
48e1416a 4643 fprintf (file, "(vertex %d: (%s%s) (in:", i,
801c5610 4644 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4645 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4646
4647 if (v->pred)
4648 for (e = v->pred; e; e = e->pred_next)
4649 fprintf (file, " %d", e->src);
4650
4651 fprintf (file, ") (out:");
4652
4653 if (v->succ)
4654 for (e = v->succ; e; e = e->succ_next)
4655 fprintf (file, " %d", e->dest);
4656
a136cad6 4657 fprintf (file, ")\n");
75a70cf9 4658 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
801c5610 4659 fprintf (file, ")\n");
4660}
4661
4662/* Call dump_rdg_vertex on stderr. */
4663
4b987fac 4664DEBUG_FUNCTION void
801c5610 4665debug_rdg_vertex (struct graph *rdg, int i)
4666{
4667 dump_rdg_vertex (stderr, rdg, i);
4668}
4669
4670/* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4671 dumped vertices to that bitmap. */
4672
4673void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4674{
4675 int i;
4676
4677 fprintf (file, "(%d\n", c);
4678
4679 for (i = 0; i < rdg->n_vertices; i++)
4680 if (rdg->vertices[i].component == c)
4681 {
4682 if (dumped)
4683 bitmap_set_bit (dumped, i);
4684
4685 dump_rdg_vertex (file, rdg, i);
4686 }
4687
4688 fprintf (file, ")\n");
4689}
4690
4691/* Call dump_rdg_vertex on stderr. */
4692
4b987fac 4693DEBUG_FUNCTION void
801c5610 4694debug_rdg_component (struct graph *rdg, int c)
4695{
4696 dump_rdg_component (stderr, rdg, c, NULL);
4697}
4698
4699/* Dump the reduced dependence graph RDG to FILE. */
4700
4701void
4702dump_rdg (FILE *file, struct graph *rdg)
74c8f69a 4703{
4704 int i;
801c5610 4705 bitmap dumped = BITMAP_ALLOC (NULL);
4706
4707 fprintf (file, "(rdg\n");
74c8f69a 4708
4709 for (i = 0; i < rdg->n_vertices; i++)
801c5610 4710 if (!bitmap_bit_p (dumped, i))
4711 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
74c8f69a 4712
801c5610 4713 fprintf (file, ")\n");
4714 BITMAP_FREE (dumped);
74c8f69a 4715}
4716
801c5610 4717/* Call dump_rdg on stderr. */
4718
4b987fac 4719DEBUG_FUNCTION void
801c5610 4720debug_rdg (struct graph *rdg)
4721{
4722 dump_rdg (stderr, rdg);
4723}
74c8f69a 4724
2df6dddb 4725static void
4726dot_rdg_1 (FILE *file, struct graph *rdg)
4727{
4728 int i;
4729
4730 fprintf (file, "digraph RDG {\n");
4731
4732 for (i = 0; i < rdg->n_vertices; i++)
4733 {
4734 struct vertex *v = &(rdg->vertices[i]);
4735 struct graph_edge *e;
4736
4737 /* Highlight reads from memory. */
4738 if (RDG_MEM_READS_STMT (rdg, i))
4739 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4740
4741 /* Highlight stores to memory. */
4742 if (RDG_MEM_WRITE_STMT (rdg, i))
4743 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4744
4745 if (v->succ)
4746 for (e = v->succ; e; e = e->succ_next)
4747 switch (RDGE_TYPE (e))
4748 {
4749 case input_dd:
4750 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4751 break;
4752
4753 case output_dd:
4754 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4755 break;
4756
4757 case flow_dd:
4758 /* These are the most common dependences: don't print these. */
4759 fprintf (file, "%d -> %d \n", i, e->dest);
4760 break;
4761
4762 case anti_dd:
4763 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4764 break;
4765
4766 default:
4767 gcc_unreachable ();
4768 }
4769 }
4770
4771 fprintf (file, "}\n\n");
4772}
4773
4774/* Display the Reduced Dependence Graph using dotty. */
4775extern void dot_rdg (struct graph *);
4776
4777DEBUG_FUNCTION void
4778dot_rdg (struct graph *rdg)
4779{
4780 /* When debugging, enable the following code. This cannot be used
4781 in production compilers because it calls "system". */
4782#if 0
4783 FILE *file = fopen ("/tmp/rdg.dot", "w");
4784 gcc_assert (file != NULL);
4785
4786 dot_rdg_1 (file, rdg);
4787 fclose (file);
4788
4789 system ("dotty /tmp/rdg.dot &");
4790#else
4791 dot_rdg_1 (stderr, rdg);
4792#endif
4793}
4794
801c5610 4795/* This structure is used for recording the mapping statement index in
4796 the RDG. */
4797
fb1e4f4a 4798struct GTY(()) rdg_vertex_info
801c5610 4799{
75a70cf9 4800 gimple stmt;
801c5610 4801 int index;
4802};
4803
4804/* Returns the index of STMT in RDG. */
4805
4806int
75a70cf9 4807rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
801c5610 4808{
4809 struct rdg_vertex_info rvi, *slot;
4810
4811 rvi.stmt = stmt;
4812 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4813
4814 if (!slot)
4815 return -1;
4816
4817 return slot->index;
4818}
4819
4820/* Creates an edge in RDG for each distance vector from DDR. The
4821 order that we keep track of in the RDG is the order in which
4822 statements have to be executed. */
4823
4824static void
4825create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4826{
4827 struct graph_edge *e;
4828 int va, vb;
4829 data_reference_p dra = DDR_A (ddr);
4830 data_reference_p drb = DDR_B (ddr);
4831 unsigned level = ddr_dependence_level (ddr);
4832
4833 /* For non scalar dependences, when the dependence is REVERSED,
4834 statement B has to be executed before statement A. */
4835 if (level > 0
4836 && !DDR_REVERSED_P (ddr))
74c8f69a 4837 {
801c5610 4838 data_reference_p tmp = dra;
4839 dra = drb;
4840 drb = tmp;
74c8f69a 4841 }
4842
801c5610 4843 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4844 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4845
4846 if (va < 0 || vb < 0)
4847 return;
74c8f69a 4848
4849 e = add_edge (rdg, va, vb);
4850 e->data = XNEW (struct rdg_edge);
4851
801c5610 4852 RDGE_LEVEL (e) = level;
255b6be7 4853 RDGE_RELATION (e) = ddr;
801c5610 4854
74c8f69a 4855 /* Determines the type of the data dependence. */
4856 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4857 RDGE_TYPE (e) = input_dd;
9ff25603 4858 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
74c8f69a 4859 RDGE_TYPE (e) = output_dd;
9ff25603 4860 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
74c8f69a 4861 RDGE_TYPE (e) = flow_dd;
9ff25603 4862 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
74c8f69a 4863 RDGE_TYPE (e) = anti_dd;
4864}
4865
4866/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4867 the index of DEF in RDG. */
4868
4869static void
4870create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4871{
4872 use_operand_p imm_use_p;
4873 imm_use_iterator iterator;
48e1416a 4874
74c8f69a 4875 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4876 {
801c5610 4877 struct graph_edge *e;
4878 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
74c8f69a 4879
801c5610 4880 if (use < 0)
4881 continue;
4882
4883 e = add_edge (rdg, idef, use);
74c8f69a 4884 e->data = XNEW (struct rdg_edge);
4885 RDGE_TYPE (e) = flow_dd;
255b6be7 4886 RDGE_RELATION (e) = NULL;
74c8f69a 4887 }
4888}
4889
4890/* Creates the edges of the reduced dependence graph RDG. */
4891
4892static void
4893create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4894{
4895 int i;
4896 struct data_dependence_relation *ddr;
4897 def_operand_p def_p;
4898 ssa_op_iter iter;
4899
48148244 4900 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
74c8f69a 4901 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4902 create_rdg_edge_for_ddr (rdg, ddr);
4903
4904 for (i = 0; i < rdg->n_vertices; i++)
801c5610 4905 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4906 iter, SSA_OP_DEF)
74c8f69a 4907 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4908}
4909
4910/* Build the vertices of the reduced dependence graph RDG. */
4911
255b6be7 4912void
75a70cf9 4913create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
74c8f69a 4914{
801c5610 4915 int i, j;
75a70cf9 4916 gimple stmt;
74c8f69a 4917
48148244 4918 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
74c8f69a 4919 {
801c5610 4920 VEC (data_ref_loc, heap) *references;
4921 data_ref_loc *ref;
74c8f69a 4922 struct vertex *v = &(rdg->vertices[i]);
801c5610 4923 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4924 struct rdg_vertex_info **slot;
4925
4926 rvi->stmt = stmt;
4927 rvi->index = i;
4928 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4929
4930 if (!*slot)
4931 *slot = rvi;
4932 else
4933 free (rvi);
74c8f69a 4934
4935 v->data = XNEW (struct rdg_vertex);
801c5610 4936 RDG_STMT (rdg, i) = stmt;
4937
4938 RDG_MEM_WRITE_STMT (rdg, i) = false;
4939 RDG_MEM_READS_STMT (rdg, i) = false;
75a70cf9 4940 if (gimple_code (stmt) == GIMPLE_PHI)
801c5610 4941 continue;
4942
4943 get_references_in_stmt (stmt, &references);
48148244 4944 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
801c5610 4945 if (!ref->is_read)
4946 RDG_MEM_WRITE_STMT (rdg, i) = true;
4947 else
4948 RDG_MEM_READS_STMT (rdg, i) = true;
4949
4950 VEC_free (data_ref_loc, heap, references);
74c8f69a 4951 }
4952}
4953
801c5610 4954/* Initialize STMTS with all the statements of LOOP. When
4955 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4956 which we discover statements is important as
4957 generate_loops_for_partition is using the same traversal for
4958 identifying statements. */
74c8f69a 4959
4960static void
75a70cf9 4961stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
74c8f69a 4962{
4963 unsigned int i;
4964 basic_block *bbs = get_loop_body_in_dom_order (loop);
4965
4966 for (i = 0; i < loop->num_nodes; i++)
4967 {
74c8f69a 4968 basic_block bb = bbs[i];
75a70cf9 4969 gimple_stmt_iterator bsi;
4970 gimple stmt;
74c8f69a 4971
75a70cf9 4972 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4973 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
74c8f69a 4974
75a70cf9 4975 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4976 {
4977 stmt = gsi_stmt (bsi);
4978 if (gimple_code (stmt) != GIMPLE_LABEL)
4979 VEC_safe_push (gimple, heap, *stmts, stmt);
4980 }
74c8f69a 4981 }
4982
4983 free (bbs);
4984}
4985
4986/* Returns true when all the dependences are computable. */
4987
4988static bool
4989known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4990{
4991 ddr_p ddr;
4992 unsigned int i;
4993
48148244 4994 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
74c8f69a 4995 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4996 return false;
48e1416a 4997
74c8f69a 4998 return true;
4999}
5000
801c5610 5001/* Computes a hash function for element ELT. */
5002
5003static hashval_t
5004hash_stmt_vertex_info (const void *elt)
5005{
f7f3687c 5006 const struct rdg_vertex_info *const rvi =
5007 (const struct rdg_vertex_info *) elt;
75a70cf9 5008 gimple stmt = rvi->stmt;
801c5610 5009
5010 return htab_hash_pointer (stmt);
5011}
5012
5013/* Compares database elements E1 and E2. */
5014
5015static int
5016eq_stmt_vertex_info (const void *e1, const void *e2)
5017{
5018 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5019 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5020
5021 return elt1->stmt == elt2->stmt;
5022}
5023
5024/* Free the element E. */
5025
5026static void
5027hash_stmt_vertex_del (void *e)
5028{
5029 free (e);
5030}
5031
255b6be7 5032/* Build the Reduced Dependence Graph (RDG) with one vertex per
5033 statement of the loop nest, and one edge per data dependence or
5034 scalar dependence. */
5035
5036struct graph *
5037build_empty_rdg (int n_stmts)
5038{
5039 int nb_data_refs = 10;
5040 struct graph *rdg = new_graph (n_stmts);
5041
5042 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5043 eq_stmt_vertex_info, hash_stmt_vertex_del);
5044 return rdg;
5045}
5046
801c5610 5047/* Build the Reduced Dependence Graph (RDG) with one vertex per
5048 statement of the loop nest, and one edge per data dependence or
5049 scalar dependence. */
74c8f69a 5050
5051struct graph *
a8af2e86 5052build_rdg (struct loop *loop,
5053 VEC (loop_p, heap) **loop_nest,
5054 VEC (ddr_p, heap) **dependence_relations,
5055 VEC (data_reference_p, heap) **datarefs)
74c8f69a 5056{
74c8f69a 5057 struct graph *rdg = NULL;
a8af2e86 5058 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
74c8f69a 5059
a8af2e86 5060 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5061 dependence_relations);
801c5610 5062
a8af2e86 5063 if (known_dependences_p (*dependence_relations))
5064 {
5065 stmts_from_loop (loop, &stmts);
5066 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5067 create_rdg_vertices (rdg, stmts);
5068 create_rdg_edges (rdg, *dependence_relations);
5069 }
74c8f69a 5070
75a70cf9 5071 VEC_free (gimple, heap, stmts);
74c8f69a 5072 return rdg;
5073}
801c5610 5074
5075/* Free the reduced dependence graph RDG. */
5076
5077void
5078free_rdg (struct graph *rdg)
5079{
5080 int i;
5081
5082 for (i = 0; i < rdg->n_vertices; i++)
a8af2e86 5083 {
5084 struct vertex *v = &(rdg->vertices[i]);
5085 struct graph_edge *e;
5086
5087 for (e = v->succ; e; e = e->succ_next)
5088 if (e->data)
5089 free (e->data);
5090
5091 if (v->data)
5092 free (v->data);
5093 }
801c5610 5094
5095 htab_delete (rdg->indices);
5096 free_graph (rdg);
5097}
5098
5099/* Initialize STMTS with all the statements of LOOP that contain a
5100 store to memory. */
5101
5102void
75a70cf9 5103stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
801c5610 5104{
5105 unsigned int i;
5106 basic_block *bbs = get_loop_body_in_dom_order (loop);
5107
5108 for (i = 0; i < loop->num_nodes; i++)
5109 {
5110 basic_block bb = bbs[i];
75a70cf9 5111 gimple_stmt_iterator bsi;
801c5610 5112
75a70cf9 5113 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
dd277d48 5114 if (gimple_vdef (gsi_stmt (bsi)))
75a70cf9 5115 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
801c5610 5116 }
5117
5118 free (bbs);
5119}
5120
a136cad6 5121/* Returns true when the statement at STMT is of the form "A[i] = 0"
5122 that contains a data reference on its LHS with a stride of the same
5123 size as its unit type. */
1c4f9959 5124
a136cad6 5125bool
5126stmt_with_adjacent_zero_store_dr_p (gimple stmt)
1c4f9959 5127{
a136cad6 5128 tree op0, op1;
1c4f9959 5129 bool res;
a136cad6 5130 struct data_reference *dr;
5131
5132 if (!stmt
5133 || !gimple_vdef (stmt)
5134 || !is_gimple_assign (stmt)
5135 || !gimple_assign_single_p (stmt)
5136 || !(op1 = gimple_assign_rhs1 (stmt))
5137 || !(integer_zerop (op1) || real_zerop (op1)))
5138 return false;
5139
5140 dr = XCNEW (struct data_reference);
5141 op0 = gimple_assign_lhs (stmt);
1c4f9959 5142
5143 DR_STMT (dr) = stmt;
5144 DR_REF (dr) = op0;
5145
5146 res = dr_analyze_innermost (dr)
5147 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5148
5149 free_data_ref (dr);
5150 return res;
5151}
5152
0acf3477 5153/* Initialize STMTS with all the statements of LOOP that contain a
5154 store to memory of the form "A[i] = 0". */
5155
5156void
5157stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5158{
5159 unsigned int i;
5160 basic_block bb;
5161 gimple_stmt_iterator si;
5162 gimple stmt;
0acf3477 5163 basic_block *bbs = get_loop_body_in_dom_order (loop);
5164
5165 for (i = 0; i < loop->num_nodes; i++)
5166 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5167 if ((stmt = gsi_stmt (si))
a136cad6 5168 && stmt_with_adjacent_zero_store_dr_p (stmt))
0acf3477 5169 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5170
5171 free (bbs);
5172}
5173
801c5610 5174/* For a data reference REF, return the declaration of its base
5175 address or NULL_TREE if the base is not determined. */
5176
5177static inline tree
75a70cf9 5178ref_base_address (gimple stmt, data_ref_loc *ref)
801c5610 5179{
5180 tree base = NULL_TREE;
5181 tree base_address;
5182 struct data_reference *dr = XCNEW (struct data_reference);
5183
5184 DR_STMT (dr) = stmt;
5185 DR_REF (dr) = *ref->pos;
5186 dr_analyze_innermost (dr);
5187 base_address = DR_BASE_ADDRESS (dr);
5188
5189 if (!base_address)
5190 goto end;
5191
5192 switch (TREE_CODE (base_address))
5193 {
5194 case ADDR_EXPR:
5195 base = TREE_OPERAND (base_address, 0);
5196 break;
5197
5198 default:
5199 base = base_address;
5200 break;
5201 }
5202
5203 end:
5204 free_data_ref (dr);
5205 return base;
5206}
5207
5208/* Determines whether the statement from vertex V of the RDG has a
5209 definition used outside the loop that contains this statement. */
5210
5211bool
5212rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5213{
75a70cf9 5214 gimple stmt = RDG_STMT (rdg, v);
801c5610 5215 struct loop *loop = loop_containing_stmt (stmt);
5216 use_operand_p imm_use_p;
5217 imm_use_iterator iterator;
5218 ssa_op_iter it;
5219 def_operand_p def_p;
5220
5221 if (!loop)
5222 return true;
5223
5224 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5225 {
5226 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5227 {
5228 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5229 return true;
5230 }
5231 }
5232
5233 return false;
5234}
5235
5236/* Determines whether statements S1 and S2 access to similar memory
5237 locations. Two memory accesses are considered similar when they
5238 have the same base address declaration, i.e. when their
5239 ref_base_address is the same. */
5240
5241bool
75a70cf9 5242have_similar_memory_accesses (gimple s1, gimple s2)
801c5610 5243{
5244 bool res = false;
5245 unsigned i, j;
5246 VEC (data_ref_loc, heap) *refs1, *refs2;
5247 data_ref_loc *ref1, *ref2;
5248
5249 get_references_in_stmt (s1, &refs1);
5250 get_references_in_stmt (s2, &refs2);
5251
48148244 5252 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
801c5610 5253 {
5254 tree base1 = ref_base_address (s1, ref1);
5255
5256 if (base1)
48148244 5257 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
801c5610 5258 if (base1 == ref_base_address (s2, ref2))
5259 {
5260 res = true;
5261 goto end;
5262 }
5263 }
5264
5265 end:
5266 VEC_free (data_ref_loc, heap, refs1);
5267 VEC_free (data_ref_loc, heap, refs2);
5268 return res;
5269}
5270
5271/* Helper function for the hashtab. */
5272
5273static int
5274have_similar_memory_accesses_1 (const void *s1, const void *s2)
5275{
75a70cf9 5276 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5277 CONST_CAST_GIMPLE ((const_gimple) s2));
801c5610 5278}
5279
5280/* Helper function for the hashtab. */
5281
5282static hashval_t
5283ref_base_address_1 (const void *s)
5284{
75a70cf9 5285 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
801c5610 5286 unsigned i;
5287 VEC (data_ref_loc, heap) *refs;
5288 data_ref_loc *ref;
5289 hashval_t res = 0;
5290
5291 get_references_in_stmt (stmt, &refs);
5292
48148244 5293 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
801c5610 5294 if (!ref->is_read)
5295 {
5296 res = htab_hash_pointer (ref_base_address (stmt, ref));
5297 break;
5298 }
5299
5300 VEC_free (data_ref_loc, heap, refs);
5301 return res;
5302}
5303
5304/* Try to remove duplicated write data references from STMTS. */
5305
5306void
75a70cf9 5307remove_similar_memory_refs (VEC (gimple, heap) **stmts)
801c5610 5308{
5309 unsigned i;
75a70cf9 5310 gimple stmt;
5311 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
801c5610 5312 have_similar_memory_accesses_1, NULL);
5313
75a70cf9 5314 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
801c5610 5315 {
5316 void **slot;
5317
5318 slot = htab_find_slot (seen, stmt, INSERT);
5319
5320 if (*slot)
75a70cf9 5321 VEC_ordered_remove (gimple, *stmts, i);
801c5610 5322 else
5323 {
5324 *slot = (void *) stmt;
5325 i++;
5326 }
5327 }
5328
5329 htab_delete (seen);
5330}
5331
b79b3386 5332/* Returns the index of PARAMETER in the parameters vector of the
5333 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5334
48e1416a 5335int
5336access_matrix_get_index_for_parameter (tree parameter,
b79b3386 5337 struct access_matrix *access_matrix)
5338{
5339 int i;
5340 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5341 tree lambda_parameter;
5342
48148244 5343 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
b79b3386 5344 if (lambda_parameter == parameter)
5345 return i + AM_NB_INDUCTION_VARS (access_matrix);
5346
5347 return -1;
5348}