]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-data-ref.c
Add RTM support to -march=native
[thirdparty/gcc.git] / gcc / tree-data-ref.c
CommitLineData
56cf8686 1/* Data references and dependences detectors.
06e1d03d 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
66647d44 3 Free Software Foundation, Inc.
0ff4040e 4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
56cf8686
SP
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
9dcd6f09 10Software Foundation; either version 3, or (at your option) any later
56cf8686
SP
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
9dcd6f09
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
56cf8686
SP
21
22/* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
b8698a0f
L
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
56cf8686
SP
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
b8698a0f 31
56cf8686 32 The goals of this analysis are:
b8698a0f 33
56cf8686
SP
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
b8698a0f 37
56cf8686
SP
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
b8698a0f 40
56cf8686
SP
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
b8698a0f
L
46
47 - to define a knowledge base for storing the data dependence
56cf8686 48 information,
b8698a0f 49
56cf8686 50 - to define an interface to access this data.
b8698a0f
L
51
52
56cf8686 53 Definitions:
b8698a0f 54
56cf8686
SP
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
b8698a0f 61 solutions are integer constants, for example the equation
56cf8686
SP
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
b8698a0f 64
56cf8686 65 References:
b8698a0f 66
56cf8686
SP
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
b8698a0f
L
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
56cf8686
SP
72 by Utpal Banerjee.
73
b8698a0f 74
56cf8686
SP
75*/
76
77#include "config.h"
78#include "system.h"
79#include "coretypes.h"
cf835838 80#include "gimple-pretty-print.h"
56cf8686 81#include "tree-flow.h"
56cf8686 82#include "cfgloop.h"
56cf8686
SP
83#include "tree-data-ref.h"
84#include "tree-scalar-evolution.h"
85#include "tree-pass.h"
946e1bc7 86#include "langhooks.h"
02f5d6c5 87#include "tree-affine.h"
3881dee9 88#include "params.h"
56cf8686 89
0ff4040e
SP
90static struct datadep_stats
91{
92 int num_dependence_tests;
93 int num_dependence_dependent;
94 int num_dependence_independent;
95 int num_dependence_undetermined;
96
97 int num_subscript_tests;
98 int num_subscript_undetermined;
99 int num_same_subscript_function;
100
101 int num_ziv;
102 int num_ziv_independent;
103 int num_ziv_dependent;
104 int num_ziv_unimplemented;
105
106 int num_siv;
107 int num_siv_independent;
108 int num_siv_dependent;
109 int num_siv_unimplemented;
110
111 int num_miv;
112 int num_miv_independent;
113 int num_miv_dependent;
114 int num_miv_unimplemented;
115} dependence_stats;
116
ba42e045
SP
117static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
118 struct data_reference *,
da9a21f4
SP
119 struct data_reference *,
120 struct loop *);
56cf8686
SP
121/* Returns true iff A divides B. */
122
b8698a0f 123static inline bool
ed7a4b4b 124tree_fold_divides_p (const_tree a, const_tree b)
56cf8686 125{
b73a6056
RS
126 gcc_assert (TREE_CODE (a) == INTEGER_CST);
127 gcc_assert (TREE_CODE (b) == INTEGER_CST);
d35936ab 128 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
56cf8686
SP
129}
130
86df10e3
SP
131/* Returns true iff A divides B. */
132
b8698a0f 133static inline bool
86df10e3
SP
134int_divides_p (int a, int b)
135{
136 return ((b % a) == 0);
56cf8686
SP
137}
138
139\f
140
b8698a0f 141/* Dump into FILE all the data references from DATAREFS. */
56cf8686 142
aeb83f09 143static void
ebf78a47 144dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
56cf8686
SP
145{
146 unsigned int i;
ebf78a47
SP
147 struct data_reference *dr;
148
ac47786e 149 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
ebf78a47 150 dump_data_reference (file, dr);
56cf8686
SP
151}
152
b8698a0f 153/* Dump into STDERR all the data references from DATAREFS. */
a37d995a 154
24e47c76 155DEBUG_FUNCTION void
a37d995a
SP
156debug_data_references (VEC (data_reference_p, heap) *datarefs)
157{
158 dump_data_references (stderr, datarefs);
159}
160
a37d995a
SP
161/* Print to STDERR the data_reference DR. */
162
24e47c76 163DEBUG_FUNCTION void
a37d995a
SP
164debug_data_reference (struct data_reference *dr)
165{
166 dump_data_reference (stderr, dr);
167}
168
56cf8686
SP
169/* Dump function for a DATA_REFERENCE structure. */
170
b8698a0f
L
171void
172dump_data_reference (FILE *outf,
56cf8686
SP
173 struct data_reference *dr)
174{
175 unsigned int i;
b8698a0f 176
28c5db57
SP
177 fprintf (outf, "#(Data Ref: \n");
178 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
179 fprintf (outf, "# stmt: ");
726a989a 180 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
03922af3 181 fprintf (outf, "# ref: ");
56cf8686 182 print_generic_stmt (outf, DR_REF (dr), 0);
03922af3 183 fprintf (outf, "# base_object: ");
86a07404 184 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
b8698a0f 185
56cf8686
SP
186 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
187 {
03922af3 188 fprintf (outf, "# Access function %d: ", i);
56cf8686
SP
189 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
190 }
03922af3 191 fprintf (outf, "#)\n");
56cf8686
SP
192}
193
d93817c4
ZD
194/* Dumps the affine function described by FN to the file OUTF. */
195
196static void
197dump_affine_function (FILE *outf, affine_fn fn)
198{
199 unsigned i;
200 tree coef;
201
202 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
203 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
204 {
205 fprintf (outf, " + ");
206 print_generic_expr (outf, coef, TDF_SLIM);
207 fprintf (outf, " * x_%u", i);
208 }
209}
210
211/* Dumps the conflict function CF to the file OUTF. */
212
213static void
214dump_conflict_function (FILE *outf, conflict_function *cf)
215{
216 unsigned i;
217
218 if (cf->n == NO_DEPENDENCE)
219 fprintf (outf, "no dependence\n");
220 else if (cf->n == NOT_KNOWN)
221 fprintf (outf, "not known\n");
222 else
223 {
224 for (i = 0; i < cf->n; i++)
225 {
226 fprintf (outf, "[");
227 dump_affine_function (outf, cf->fns[i]);
228 fprintf (outf, "]\n");
229 }
230 }
231}
232
86df10e3
SP
233/* Dump function for a SUBSCRIPT structure. */
234
aeb83f09 235static void
86df10e3
SP
236dump_subscript (FILE *outf, struct subscript *subscript)
237{
d93817c4 238 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
86df10e3
SP
239
240 fprintf (outf, "\n (subscript \n");
241 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
d93817c4
ZD
242 dump_conflict_function (outf, cf);
243 if (CF_NONTRIVIAL_P (cf))
86df10e3
SP
244 {
245 tree last_iteration = SUB_LAST_CONFLICT (subscript);
246 fprintf (outf, " last_conflict: ");
247 print_generic_stmt (outf, last_iteration, 0);
248 }
b8698a0f 249
d93817c4 250 cf = SUB_CONFLICTS_IN_B (subscript);
86df10e3 251 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
d93817c4
ZD
252 dump_conflict_function (outf, cf);
253 if (CF_NONTRIVIAL_P (cf))
86df10e3
SP
254 {
255 tree last_iteration = SUB_LAST_CONFLICT (subscript);
256 fprintf (outf, " last_conflict: ");
257 print_generic_stmt (outf, last_iteration, 0);
258 }
259
260 fprintf (outf, " (Subscript distance: ");
261 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
262 fprintf (outf, " )\n");
263 fprintf (outf, " )\n");
264}
265
0ff4040e
SP
266/* Print the classic direction vector DIRV to OUTF. */
267
aeb83f09 268static void
0ff4040e
SP
269print_direction_vector (FILE *outf,
270 lambda_vector dirv,
271 int length)
272{
273 int eq;
274
275 for (eq = 0; eq < length; eq++)
276 {
81f40b79
ILT
277 enum data_dependence_direction dir = ((enum data_dependence_direction)
278 dirv[eq]);
0ff4040e
SP
279
280 switch (dir)
281 {
282 case dir_positive:
283 fprintf (outf, " +");
284 break;
285 case dir_negative:
286 fprintf (outf, " -");
287 break;
288 case dir_equal:
289 fprintf (outf, " =");
290 break;
291 case dir_positive_or_equal:
292 fprintf (outf, " +=");
293 break;
294 case dir_positive_or_negative:
295 fprintf (outf, " +-");
296 break;
297 case dir_negative_or_equal:
298 fprintf (outf, " -=");
299 break;
300 case dir_star:
301 fprintf (outf, " *");
302 break;
303 default:
304 fprintf (outf, "indep");
305 break;
306 }
307 }
308 fprintf (outf, "\n");
309}
310
ba42e045
SP
311/* Print a vector of direction vectors. */
312
aeb83f09 313static void
ba42e045
SP
314print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
315 int length)
316{
317 unsigned j;
318 lambda_vector v;
319
ac47786e 320 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
ba42e045
SP
321 print_direction_vector (outf, v, length);
322}
323
b305e3da
SP
324/* Print out a vector VEC of length N to OUTFILE. */
325
326static inline void
327print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
328{
329 int i;
330
331 for (i = 0; i < n; i++)
332 fprintf (outfile, "%3d ", vector[i]);
333 fprintf (outfile, "\n");
334}
335
ba42e045
SP
336/* Print a vector of distance vectors. */
337
aeb83f09
RG
338static void
339print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
340 int length)
ba42e045
SP
341{
342 unsigned j;
343 lambda_vector v;
344
ac47786e 345 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
ba42e045
SP
346 print_lambda_vector (outf, v, length);
347}
348
56cf8686
SP
349/* Dump function for a DATA_DEPENDENCE_RELATION structure. */
350
aeb83f09 351static void
b8698a0f 352dump_data_dependence_relation (FILE *outf,
56cf8686
SP
353 struct data_dependence_relation *ddr)
354{
56cf8686 355 struct data_reference *dra, *drb;
86df10e3 356
86df10e3 357 fprintf (outf, "(Data Dep: \n");
dea61d92 358
ed2024ba
MJ
359 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
360 {
b61b1f17
MM
361 if (ddr)
362 {
363 dra = DDR_A (ddr);
364 drb = DDR_B (ddr);
365 if (dra)
366 dump_data_reference (outf, dra);
367 else
368 fprintf (outf, " (nil)\n");
369 if (drb)
370 dump_data_reference (outf, drb);
371 else
372 fprintf (outf, " (nil)\n");
373 }
ed2024ba
MJ
374 fprintf (outf, " (don't know)\n)\n");
375 return;
376 }
377
378 dra = DDR_A (ddr);
379 drb = DDR_B (ddr);
dea61d92
SP
380 dump_data_reference (outf, dra);
381 dump_data_reference (outf, drb);
382
ed2024ba 383 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
56cf8686 384 fprintf (outf, " (no dependence)\n");
b8698a0f 385
86df10e3 386 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
56cf8686 387 {
86df10e3 388 unsigned int i;
ba42e045 389 struct loop *loopi;
304afda6 390
56cf8686
SP
391 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
392 {
56cf8686
SP
393 fprintf (outf, " access_fn_A: ");
394 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
395 fprintf (outf, " access_fn_B: ");
396 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
86df10e3 397 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
56cf8686 398 }
304afda6 399
3d8864c0 400 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
ba42e045 401 fprintf (outf, " loop nest: (");
ac47786e 402 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
ba42e045
SP
403 fprintf (outf, "%d ", loopi->num);
404 fprintf (outf, ")\n");
405
304afda6 406 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
56cf8686 407 {
304afda6
SP
408 fprintf (outf, " distance_vector: ");
409 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
ba42e045 410 DDR_NB_LOOPS (ddr));
86df10e3 411 }
304afda6
SP
412
413 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
86df10e3 414 {
304afda6 415 fprintf (outf, " direction_vector: ");
0ff4040e 416 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
ba42e045 417 DDR_NB_LOOPS (ddr));
56cf8686 418 }
56cf8686
SP
419 }
420
421 fprintf (outf, ")\n");
422}
423
aeb83f09 424/* Debug version. */
56cf8686 425
aeb83f09
RG
426DEBUG_FUNCTION void
427debug_data_dependence_relation (struct data_dependence_relation *ddr)
56cf8686 428{
aeb83f09
RG
429 dump_data_dependence_relation (stderr, ddr);
430}
b8698a0f 431
aeb83f09 432/* Dump into FILE all the dependence relations from DDRS. */
b8698a0f 433
aeb83f09
RG
434void
435dump_data_dependence_relations (FILE *file,
436 VEC (ddr_p, heap) *ddrs)
437{
438 unsigned int i;
439 struct data_dependence_relation *ddr;
b8698a0f 440
aeb83f09
RG
441 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
442 dump_data_dependence_relation (file, ddr);
443}
b8698a0f 444
aeb83f09 445/* Dump to STDERR all the dependence relations from DDRS. */
b8698a0f 446
aeb83f09
RG
447DEBUG_FUNCTION void
448debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
449{
450 dump_data_dependence_relations (stderr, ddrs);
56cf8686
SP
451}
452
86df10e3
SP
453/* Dumps the distance and direction vectors in FILE. DDRS contains
454 the dependence relations, and VECT_SIZE is the size of the
455 dependence vectors, or in other words the number of loops in the
456 considered nest. */
457
aeb83f09 458static void
ebf78a47 459dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
86df10e3 460{
304afda6 461 unsigned int i, j;
ebf78a47
SP
462 struct data_dependence_relation *ddr;
463 lambda_vector v;
86df10e3 464
ac47786e 465 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
ebf78a47
SP
466 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
467 {
ac47786e 468 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
ebf78a47
SP
469 {
470 fprintf (file, "DISTANCE_V (");
471 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
472 fprintf (file, ")\n");
473 }
474
ac47786e 475 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
ebf78a47
SP
476 {
477 fprintf (file, "DIRECTION_V (");
478 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
479 fprintf (file, ")\n");
480 }
481 }
304afda6 482
86df10e3
SP
483 fprintf (file, "\n\n");
484}
485
486/* Dumps the data dependence relations DDRS in FILE. */
487
aeb83f09 488static void
ebf78a47 489dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
86df10e3
SP
490{
491 unsigned int i;
ebf78a47
SP
492 struct data_dependence_relation *ddr;
493
ac47786e 494 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
ebf78a47 495 dump_data_dependence_relation (file, ddr);
86df10e3 496
86df10e3
SP
497 fprintf (file, "\n\n");
498}
499
aeb83f09
RG
500DEBUG_FUNCTION void
501debug_ddrs (VEC (ddr_p, heap) *ddrs)
502{
503 dump_ddrs (stderr, ddrs);
504}
505
726a989a
RB
506/* Helper function for split_constant_offset. Expresses OP0 CODE OP1
507 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
508 constant of type ssizetype, and returns true. If we cannot do this
509 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
510 is returned. */
86a07404 511
726a989a
RB
512static bool
513split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
514 tree *var, tree *off)
86a07404 515{
3cb960c7
ZD
516 tree var0, var1;
517 tree off0, off1;
726a989a 518 enum tree_code ocode = code;
86a07404 519
726a989a
RB
520 *var = NULL_TREE;
521 *off = NULL_TREE;
86a07404 522
5be014d5 523 switch (code)
86a07404 524 {
3cb960c7
ZD
525 case INTEGER_CST:
526 *var = build_int_cst (type, 0);
726a989a
RB
527 *off = fold_convert (ssizetype, op0);
528 return true;
86a07404 529
5be014d5 530 case POINTER_PLUS_EXPR:
726a989a 531 ocode = PLUS_EXPR;
5be014d5 532 /* FALLTHROUGH */
3cb960c7
ZD
533 case PLUS_EXPR:
534 case MINUS_EXPR:
726a989a
RB
535 split_constant_offset (op0, &var0, &off0);
536 split_constant_offset (op1, &var1, &off1);
537 *var = fold_build2 (code, type, var0, var1);
538 *off = size_binop (ocode, off0, off1);
539 return true;
86a07404 540
86a07404 541 case MULT_EXPR:
726a989a
RB
542 if (TREE_CODE (op1) != INTEGER_CST)
543 return false;
3cb960c7 544
726a989a
RB
545 split_constant_offset (op0, &var0, &off0);
546 *var = fold_build2 (MULT_EXPR, type, var0, op1);
547 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
548 return true;
86a07404 549
3cb960c7
ZD
550 case ADDR_EXPR:
551 {
726a989a 552 tree base, poffset;
3cb960c7
ZD
553 HOST_WIDE_INT pbitsize, pbitpos;
554 enum machine_mode pmode;
555 int punsignedp, pvolatilep;
86a07404 556
da4b6efc 557 op0 = TREE_OPERAND (op0, 0);
726a989a 558 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
3cb960c7 559 &pmode, &punsignedp, &pvolatilep, false);
86a07404 560
3cb960c7 561 if (pbitpos % BITS_PER_UNIT != 0)
726a989a 562 return false;
3cb960c7
ZD
563 base = build_fold_addr_expr (base);
564 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
86a07404 565
3cb960c7
ZD
566 if (poffset)
567 {
568 split_constant_offset (poffset, &poffset, &off1);
569 off0 = size_binop (PLUS_EXPR, off0, off1);
36ad7922 570 if (POINTER_TYPE_P (TREE_TYPE (base)))
5d49b6a7 571 base = fold_build_pointer_plus (base, poffset);
36ad7922
JJ
572 else
573 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
574 fold_convert (TREE_TYPE (base), poffset));
3cb960c7
ZD
575 }
576
6481b879
JJ
577 var0 = fold_convert (type, base);
578
579 /* If variable length types are involved, punt, otherwise casts
580 might be converted into ARRAY_REFs in gimplify_conversion.
581 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
582 possibly no longer appears in current GIMPLE, might resurface.
583 This perhaps could run
1a87cf0c 584 if (CONVERT_EXPR_P (var0))
6481b879
JJ
585 {
586 gimplify_conversion (&var0);
587 // Attempt to fill in any within var0 found ARRAY_REF's
588 // element size from corresponding op embedded ARRAY_REF,
589 // if unsuccessful, just punt.
590 } */
591 while (POINTER_TYPE_P (type))
592 type = TREE_TYPE (type);
593 if (int_size_in_bytes (type) < 0)
726a989a 594 return false;
6481b879
JJ
595
596 *var = var0;
3cb960c7 597 *off = off0;
726a989a 598 return true;
3cb960c7 599 }
86a07404 600
06cb4f79
JS
601 case SSA_NAME:
602 {
726a989a
RB
603 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
604 enum tree_code subcode;
06cb4f79 605
726a989a
RB
606 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
607 return false;
608
609 var0 = gimple_assign_rhs1 (def_stmt);
610 subcode = gimple_assign_rhs_code (def_stmt);
611 var1 = gimple_assign_rhs2 (def_stmt);
612
613 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
06cb4f79 614 }
b61b1f17
MM
615 CASE_CONVERT:
616 {
617 /* We must not introduce undefined overflow, and we must not change the value.
618 Hence we're okay if the inner type doesn't overflow to start with
619 (pointer or signed), the outer type also is an integer or pointer
620 and the outer precision is at least as large as the inner. */
621 tree itype = TREE_TYPE (op0);
622 if ((POINTER_TYPE_P (itype)
623 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
624 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
625 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
626 {
627 split_constant_offset (op0, &var0, off);
628 *var = fold_convert (type, var0);
629 return true;
630 }
631 return false;
632 }
06cb4f79 633
86a07404 634 default:
726a989a 635 return false;
86a07404 636 }
726a989a
RB
637}
638
639/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
640 will be ssizetype. */
641
642void
643split_constant_offset (tree exp, tree *var, tree *off)
644{
645 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
646 enum tree_code code;
86a07404 647
726a989a 648 *var = exp;
3cb960c7 649 *off = ssize_int (0);
726a989a
RB
650 STRIP_NOPS (exp);
651
ffd78b30
RG
652 if (tree_is_chrec (exp)
653 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
726a989a
RB
654 return;
655
656 otype = TREE_TYPE (exp);
657 code = TREE_CODE (exp);
658 extract_ops_from_tree (exp, &code, &op0, &op1);
659 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
660 {
661 *var = fold_convert (type, e);
662 *off = o;
663 }
86a07404
IR
664}
665
3cb960c7
ZD
666/* Returns the address ADDR of an object in a canonical shape (without nop
667 casts, and with type of pointer to the object). */
86a07404
IR
668
669static tree
3cb960c7 670canonicalize_base_object_address (tree addr)
86a07404 671{
bbc8a8dc
ZD
672 tree orig = addr;
673
3cb960c7 674 STRIP_NOPS (addr);
86a07404 675
bbc8a8dc
ZD
676 /* The base address may be obtained by casting from integer, in that case
677 keep the cast. */
678 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
679 return orig;
680
3cb960c7
ZD
681 if (TREE_CODE (addr) != ADDR_EXPR)
682 return addr;
86a07404 683
3cb960c7 684 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
86a07404
IR
685}
686
b8698a0f 687/* Analyzes the behavior of the memory reference DR in the innermost loop or
4e4452b6 688 basic block that contains it. Returns true if analysis succeed or false
a70d6342 689 otherwise. */
86a07404 690
3661e899 691bool
4e4452b6 692dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
86a07404 693{
726a989a 694 gimple stmt = DR_STMT (dr);
3cb960c7
ZD
695 struct loop *loop = loop_containing_stmt (stmt);
696 tree ref = DR_REF (dr);
86a07404 697 HOST_WIDE_INT pbitsize, pbitpos;
3cb960c7 698 tree base, poffset;
86a07404
IR
699 enum machine_mode pmode;
700 int punsignedp, pvolatilep;
3cb960c7
ZD
701 affine_iv base_iv, offset_iv;
702 tree init, dinit, step;
a70d6342 703 bool in_loop = (loop && loop->num);
3cb960c7
ZD
704
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 fprintf (dump_file, "analyze_innermost: ");
86a07404 707
3cb960c7
ZD
708 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
709 &pmode, &punsignedp, &pvolatilep, false);
710 gcc_assert (base != NULL_TREE);
86a07404 711
3cb960c7 712 if (pbitpos % BITS_PER_UNIT != 0)
86a07404 713 {
3cb960c7
ZD
714 if (dump_file && (dump_flags & TDF_DETAILS))
715 fprintf (dump_file, "failed: bit offset alignment.\n");
3661e899 716 return false;
3cb960c7 717 }
86a07404 718
70f34814
RG
719 if (TREE_CODE (base) == MEM_REF)
720 {
721 if (!integer_zerop (TREE_OPERAND (base, 1)))
722 {
723 if (!poffset)
724 {
725 double_int moff = mem_ref_offset (base);
726 poffset = double_int_to_tree (sizetype, moff);
727 }
728 else
729 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
730 }
731 base = TREE_OPERAND (base, 0);
732 }
733 else
734 base = build_fold_addr_expr (base);
4e4452b6 735
a70d6342 736 if (in_loop)
3cb960c7 737 {
b8698a0f 738 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
a70d6342
IR
739 false))
740 {
4e4452b6
IR
741 if (nest)
742 {
743 if (dump_file && (dump_flags & TDF_DETAILS))
744 fprintf (dump_file, "failed: evolution of base is not"
745 " affine.\n");
746 return false;
747 }
748 else
749 {
750 base_iv.base = base;
751 base_iv.step = ssize_int (0);
752 base_iv.no_overflow = true;
753 }
a70d6342
IR
754 }
755 }
756 else
757 {
758 base_iv.base = base;
759 base_iv.step = ssize_int (0);
760 base_iv.no_overflow = true;
3cb960c7 761 }
a70d6342 762
24adb18f 763 if (!poffset)
3cb960c7
ZD
764 {
765 offset_iv.base = ssize_int (0);
766 offset_iv.step = ssize_int (0);
767 }
24adb18f 768 else
3cb960c7 769 {
24adb18f
IR
770 if (!in_loop)
771 {
772 offset_iv.base = poffset;
773 offset_iv.step = ssize_int (0);
774 }
775 else if (!simple_iv (loop, loop_containing_stmt (stmt),
776 poffset, &offset_iv, false))
777 {
4e4452b6
IR
778 if (nest)
779 {
780 if (dump_file && (dump_flags & TDF_DETAILS))
781 fprintf (dump_file, "failed: evolution of offset is not"
782 " affine.\n");
783 return false;
784 }
785 else
786 {
787 offset_iv.base = poffset;
788 offset_iv.step = ssize_int (0);
789 }
24adb18f 790 }
3cb960c7 791 }
86a07404 792
3cb960c7
ZD
793 init = ssize_int (pbitpos / BITS_PER_UNIT);
794 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
795 init = size_binop (PLUS_EXPR, init, dinit);
796 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
797 init = size_binop (PLUS_EXPR, init, dinit);
86a07404 798
3cb960c7
ZD
799 step = size_binop (PLUS_EXPR,
800 fold_convert (ssizetype, base_iv.step),
801 fold_convert (ssizetype, offset_iv.step));
86a07404 802
3cb960c7 803 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
86a07404 804
3cb960c7
ZD
805 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
806 DR_INIT (dr) = init;
807 DR_STEP (dr) = step;
86a07404 808
3cb960c7 809 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
86a07404 810
3cb960c7
ZD
811 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "success.\n");
3661e899
TB
813
814 return true;
3cb960c7 815}
86a07404 816
3cb960c7 817/* Determines the base object and the list of indices of memory reference
5c640e29 818 DR, analyzed in LOOP and instantiated in loop nest NEST. */
86a07404 819
3cb960c7 820static void
5c640e29 821dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
3cb960c7 822{
3cb960c7 823 VEC (tree, heap) *access_fns = NULL;
c4ddde1b 824 tree ref, op;
9fcb758b
RG
825 tree base, off, access_fn;
826 basic_block before_loop;
b8698a0f 827
9fcb758b
RG
828 /* If analyzing a basic-block there are no indices to analyze
829 and thus no access functions. */
02f5d6c5
RG
830 if (!nest)
831 {
9fcb758b 832 DR_BASE_OBJECT (dr) = DR_REF (dr);
02f5d6c5
RG
833 DR_ACCESS_FNS (dr) = NULL;
834 return;
835 }
836
c4ddde1b 837 ref = DR_REF (dr);
02f5d6c5 838 before_loop = block_before_loop (nest);
b8698a0f 839
9fcb758b
RG
840 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
841 into a two element array with a constant index. The base is
842 then just the immediate underlying object. */
843 if (TREE_CODE (ref) == REALPART_EXPR)
844 {
845 ref = TREE_OPERAND (ref, 0);
846 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
847 }
848 else if (TREE_CODE (ref) == IMAGPART_EXPR)
849 {
850 ref = TREE_OPERAND (ref, 0);
851 VEC_safe_push (tree, heap, access_fns, integer_one_node);
852 }
853
b8324815 854 /* Analyze access functions of dimensions we know to be independent. */
c4ddde1b 855 while (handled_component_p (ref))
86a07404 856 {
c4ddde1b 857 if (TREE_CODE (ref) == ARRAY_REF)
86a07404 858 {
c4ddde1b 859 op = TREE_OPERAND (ref, 1);
02f5d6c5
RG
860 access_fn = analyze_scalar_evolution (loop, op);
861 access_fn = instantiate_scev (before_loop, loop, access_fn);
862 VEC_safe_push (tree, heap, access_fns, access_fn);
b8324815 863 }
c4ddde1b
RG
864 else if (TREE_CODE (ref) == COMPONENT_REF
865 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
866 {
867 /* For COMPONENT_REFs of records (but not unions!) use the
868 FIELD_DECL offset as constant access function so we can
869 disambiguate a[i].f1 and a[i].f2. */
870 tree off = component_ref_field_offset (ref);
871 off = size_binop (PLUS_EXPR,
872 size_binop (MULT_EXPR,
873 fold_convert (bitsizetype, off),
874 bitsize_int (BITS_PER_UNIT)),
875 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
876 VEC_safe_push (tree, heap, access_fns, off);
877 }
878 else
879 /* If we have an unhandled component we could not translate
880 to an access function stop analyzing. We have determined
881 our base object in this case. */
882 break;
b8698a0f 883
c4ddde1b 884 ref = TREE_OPERAND (ref, 0);
86a07404
IR
885 }
886
8c330caa
RG
887 /* If the address operand of a MEM_REF base has an evolution in the
888 analyzed nest, add it as an additional independent access-function. */
c4ddde1b 889 if (TREE_CODE (ref) == MEM_REF)
86a07404 890 {
c4ddde1b 891 op = TREE_OPERAND (ref, 0);
3cb960c7 892 access_fn = analyze_scalar_evolution (loop, op);
a213b219 893 access_fn = instantiate_scev (before_loop, loop, access_fn);
8c330caa 894 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
2946bd34 895 {
0a500dd3 896 tree orig_type;
c4ddde1b 897 tree memoff = TREE_OPERAND (ref, 1);
8c330caa 898 base = initial_condition (access_fn);
0a500dd3
RG
899 orig_type = TREE_TYPE (base);
900 STRIP_USELESS_TYPE_CONVERSION (base);
8c330caa
RG
901 split_constant_offset (base, &base, &off);
902 /* Fold the MEM_REF offset into the evolutions initial
903 value to make more bases comparable. */
c4ddde1b 904 if (!integer_zerop (memoff))
8c330caa
RG
905 {
906 off = size_binop (PLUS_EXPR, off,
c4ddde1b
RG
907 fold_convert (ssizetype, memoff));
908 memoff = build_int_cst (TREE_TYPE (memoff), 0);
8c330caa
RG
909 }
910 access_fn = chrec_replace_initial_condition
0a500dd3 911 (access_fn, fold_convert (orig_type, off));
c4ddde1b
RG
912 /* ??? This is still not a suitable base object for
913 dr_may_alias_p - the base object needs to be an
914 access that covers the object as whole. With
915 an evolution in the pointer this cannot be
916 guaranteed.
917 As a band-aid, mark the access so we can special-case
918 it in dr_may_alias_p. */
919 ref = fold_build2_loc (EXPR_LOCATION (ref),
920 MEM_REF, TREE_TYPE (ref),
921 base, memoff);
922 DR_UNCONSTRAINED_BASE (dr) = true;
8c330caa 923 VEC_safe_push (tree, heap, access_fns, access_fn);
2946bd34 924 }
86a07404 925 }
c4ddde1b
RG
926 else if (DECL_P (ref))
927 {
928 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
929 ref = build2 (MEM_REF, TREE_TYPE (ref),
930 build_fold_addr_expr (ref),
931 build_int_cst (reference_alias_ptr_type (ref), 0));
932 }
86a07404 933
3cb960c7
ZD
934 DR_BASE_OBJECT (dr) = ref;
935 DR_ACCESS_FNS (dr) = access_fns;
86a07404
IR
936}
937
3cb960c7 938/* Extracts the alias analysis information from the memory reference DR. */
86a07404 939
3cb960c7
ZD
940static void
941dr_analyze_alias (struct data_reference *dr)
86a07404 942{
3cb960c7 943 tree ref = DR_REF (dr);
5006671f
RG
944 tree base = get_base_address (ref), addr;
945
70f34814
RG
946 if (INDIRECT_REF_P (base)
947 || TREE_CODE (base) == MEM_REF)
3cb960c7
ZD
948 {
949 addr = TREE_OPERAND (base, 0);
950 if (TREE_CODE (addr) == SSA_NAME)
5006671f 951 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
3cb960c7 952 }
3cb960c7 953}
86a07404 954
3cb960c7 955/* Frees data reference DR. */
8fdbc9c6 956
dea61d92 957void
8fdbc9c6
ZD
958free_data_ref (data_reference_p dr)
959{
3cb960c7 960 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
8fdbc9c6
ZD
961 free (dr);
962}
86a07404 963
3cb960c7
ZD
964/* Analyzes memory reference MEMREF accessed in STMT. The reference
965 is read if IS_READ is true, write otherwise. Returns the
5c640e29
SP
966 data_reference description of MEMREF. NEST is the outermost loop
967 in which the reference should be instantiated, LOOP is the loop in
968 which the data reference should be analyzed. */
86a07404 969
5417e022 970struct data_reference *
5c640e29
SP
971create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
972 bool is_read)
86a07404 973{
3cb960c7 974 struct data_reference *dr;
0ff4040e 975
3cb960c7 976 if (dump_file && (dump_flags & TDF_DETAILS))
0ff4040e 977 {
3cb960c7
ZD
978 fprintf (dump_file, "Creating dr for ");
979 print_generic_expr (dump_file, memref, TDF_SLIM);
980 fprintf (dump_file, "\n");
0ff4040e 981 }
e2157b49 982
3cb960c7
ZD
983 dr = XCNEW (struct data_reference);
984 DR_STMT (dr) = stmt;
985 DR_REF (dr) = memref;
986 DR_IS_READ (dr) = is_read;
86a07404 987
4e4452b6 988 dr_analyze_innermost (dr, nest);
5c640e29 989 dr_analyze_indices (dr, nest, loop);
3cb960c7 990 dr_analyze_alias (dr);
86a07404
IR
991
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 {
b8324815 994 unsigned i;
3cb960c7 995 fprintf (dump_file, "\tbase_address: ");
86a07404
IR
996 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
997 fprintf (dump_file, "\n\toffset from base address: ");
998 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
999 fprintf (dump_file, "\n\tconstant offset from base address: ");
1000 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
86a07404
IR
1001 fprintf (dump_file, "\n\tstep: ");
1002 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
3cb960c7
ZD
1003 fprintf (dump_file, "\n\taligned to: ");
1004 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1005 fprintf (dump_file, "\n\tbase_object: ");
1006 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
86a07404 1007 fprintf (dump_file, "\n");
b8324815
RG
1008 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1009 {
1010 fprintf (dump_file, "\tAccess function %d: ", i);
1011 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1012 }
3cb960c7
ZD
1013 }
1014
b8698a0f 1015 return dr;
86a07404
IR
1016}
1017
bfe068c3
IR
1018/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1019 expressions. */
1020static bool
1021dr_equal_offsets_p1 (tree offset1, tree offset2)
1022{
1023 bool res;
1024
1025 STRIP_NOPS (offset1);
1026 STRIP_NOPS (offset2);
1027
1028 if (offset1 == offset2)
1029 return true;
1030
1031 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1032 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1033 return false;
1034
1035 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1036 TREE_OPERAND (offset2, 0));
1037
1038 if (!res || !BINARY_CLASS_P (offset1))
1039 return res;
1040
1041 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1042 TREE_OPERAND (offset2, 1));
1043
1044 return res;
1045}
1046
1047/* Check if DRA and DRB have equal offsets. */
1048bool
1049dr_equal_offsets_p (struct data_reference *dra,
1050 struct data_reference *drb)
1051{
1052 tree offset1, offset2;
1053
1054 offset1 = DR_OFFSET (dra);
1055 offset2 = DR_OFFSET (drb);
1056
1057 return dr_equal_offsets_p1 (offset1, offset2);
1058}
1059
d93817c4
ZD
1060/* Returns true if FNA == FNB. */
1061
1062static bool
1063affine_function_equal_p (affine_fn fna, affine_fn fnb)
1064{
1065 unsigned i, n = VEC_length (tree, fna);
86a07404 1066
f86289d5
ZD
1067 if (n != VEC_length (tree, fnb))
1068 return false;
86df10e3 1069
d93817c4
ZD
1070 for (i = 0; i < n; i++)
1071 if (!operand_equal_p (VEC_index (tree, fna, i),
1072 VEC_index (tree, fnb, i), 0))
1073 return false;
1074
1075 return true;
1076}
1077
1078/* If all the functions in CF are the same, returns one of them,
1079 otherwise returns NULL. */
1080
1081static affine_fn
1082common_affine_function (conflict_function *cf)
86df10e3 1083{
d93817c4
ZD
1084 unsigned i;
1085 affine_fn comm;
1086
1087 if (!CF_NONTRIVIAL_P (cf))
1088 return NULL;
1089
1090 comm = cf->fns[0];
1091
1092 for (i = 1; i < cf->n; i++)
1093 if (!affine_function_equal_p (comm, cf->fns[i]))
1094 return NULL;
1095
1096 return comm;
1097}
86df10e3 1098
d93817c4
ZD
1099/* Returns the base of the affine function FN. */
1100
1101static tree
1102affine_function_base (affine_fn fn)
1103{
1104 return VEC_index (tree, fn, 0);
1105}
1106
1107/* Returns true if FN is a constant. */
1108
1109static bool
1110affine_function_constant_p (affine_fn fn)
1111{
1112 unsigned i;
1113 tree coef;
1114
1115 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1116 if (!integer_zerop (coef))
e2157b49
SP
1117 return false;
1118
86df10e3
SP
1119 return true;
1120}
1121
1baf2906
SP
1122/* Returns true if FN is the zero constant function. */
1123
1124static bool
1125affine_function_zero_p (affine_fn fn)
1126{
1127 return (integer_zerop (affine_function_base (fn))
1128 && affine_function_constant_p (fn));
1129}
1130
33b30201
SP
1131/* Returns a signed integer type with the largest precision from TA
1132 and TB. */
1133
1134static tree
1135signed_type_for_types (tree ta, tree tb)
1136{
1137 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1138 return signed_type_for (ta);
1139 else
1140 return signed_type_for (tb);
1141}
1142
d93817c4
ZD
1143/* Applies operation OP on affine functions FNA and FNB, and returns the
1144 result. */
1145
1146static affine_fn
1147affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1148{
1149 unsigned i, n, m;
1150 affine_fn ret;
1151 tree coef;
1152
1153 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1154 {
1155 n = VEC_length (tree, fna);
1156 m = VEC_length (tree, fnb);
1157 }
1158 else
1159 {
1160 n = VEC_length (tree, fnb);
1161 m = VEC_length (tree, fna);
1162 }
1163
1164 ret = VEC_alloc (tree, heap, m);
1165 for (i = 0; i < n; i++)
33b30201
SP
1166 {
1167 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1168 TREE_TYPE (VEC_index (tree, fnb, i)));
1169
1170 VEC_quick_push (tree, ret,
1171 fold_build2 (op, type,
b8698a0f 1172 VEC_index (tree, fna, i),
33b30201
SP
1173 VEC_index (tree, fnb, i)));
1174 }
d93817c4
ZD
1175
1176 for (; VEC_iterate (tree, fna, i, coef); i++)
1177 VEC_quick_push (tree, ret,
33b30201 1178 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
d93817c4
ZD
1179 coef, integer_zero_node));
1180 for (; VEC_iterate (tree, fnb, i, coef); i++)
1181 VEC_quick_push (tree, ret,
33b30201 1182 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
d93817c4
ZD
1183 integer_zero_node, coef));
1184
1185 return ret;
1186}
1187
1188/* Returns the sum of affine functions FNA and FNB. */
1189
1190static affine_fn
1191affine_fn_plus (affine_fn fna, affine_fn fnb)
1192{
1193 return affine_fn_op (PLUS_EXPR, fna, fnb);
1194}
1195
1196/* Returns the difference of affine functions FNA and FNB. */
1197
1198static affine_fn
1199affine_fn_minus (affine_fn fna, affine_fn fnb)
1200{
1201 return affine_fn_op (MINUS_EXPR, fna, fnb);
1202}
1203
1204/* Frees affine function FN. */
1205
1206static void
1207affine_fn_free (affine_fn fn)
1208{
1209 VEC_free (tree, heap, fn);
1210}
1211
86df10e3
SP
1212/* Determine for each subscript in the data dependence relation DDR
1213 the distance. */
56cf8686 1214
0ff4040e 1215static void
86df10e3 1216compute_subscript_distance (struct data_dependence_relation *ddr)
56cf8686 1217{
d93817c4
ZD
1218 conflict_function *cf_a, *cf_b;
1219 affine_fn fn_a, fn_b, diff;
1220
56cf8686
SP
1221 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1222 {
1223 unsigned int i;
b8698a0f 1224
56cf8686
SP
1225 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1226 {
56cf8686 1227 struct subscript *subscript;
b8698a0f 1228
56cf8686 1229 subscript = DDR_SUBSCRIPT (ddr, i);
d93817c4
ZD
1230 cf_a = SUB_CONFLICTS_IN_A (subscript);
1231 cf_b = SUB_CONFLICTS_IN_B (subscript);
86df10e3 1232
d93817c4
ZD
1233 fn_a = common_affine_function (cf_a);
1234 fn_b = common_affine_function (cf_b);
1235 if (!fn_a || !fn_b)
86df10e3 1236 {
d93817c4
ZD
1237 SUB_DISTANCE (subscript) = chrec_dont_know;
1238 return;
86df10e3 1239 }
d93817c4 1240 diff = affine_fn_minus (fn_a, fn_b);
b8698a0f 1241
d93817c4
ZD
1242 if (affine_function_constant_p (diff))
1243 SUB_DISTANCE (subscript) = affine_function_base (diff);
56cf8686
SP
1244 else
1245 SUB_DISTANCE (subscript) = chrec_dont_know;
d93817c4
ZD
1246
1247 affine_fn_free (diff);
56cf8686
SP
1248 }
1249 }
1250}
1251
d93817c4
ZD
1252/* Returns the conflict function for "unknown". */
1253
1254static conflict_function *
1255conflict_fn_not_known (void)
1256{
1257 conflict_function *fn = XCNEW (conflict_function);
1258 fn->n = NOT_KNOWN;
1259
1260 return fn;
1261}
1262
1263/* Returns the conflict function for "independent". */
1264
1265static conflict_function *
1266conflict_fn_no_dependence (void)
1267{
1268 conflict_function *fn = XCNEW (conflict_function);
1269 fn->n = NO_DEPENDENCE;
1270
1271 return fn;
1272}
1273
3cb960c7
ZD
1274/* Returns true if the address of OBJ is invariant in LOOP. */
1275
1276static bool
ed7a4b4b 1277object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
3cb960c7
ZD
1278{
1279 while (handled_component_p (obj))
1280 {
1281 if (TREE_CODE (obj) == ARRAY_REF)
1282 {
1283 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1284 need to check the stride and the lower bound of the reference. */
1285 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1286 loop->num)
1287 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1288 loop->num))
1289 return false;
1290 }
1291 else if (TREE_CODE (obj) == COMPONENT_REF)
1292 {
1293 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1294 loop->num))
1295 return false;
1296 }
1297 obj = TREE_OPERAND (obj, 0);
1298 }
1299
70f34814
RG
1300 if (!INDIRECT_REF_P (obj)
1301 && TREE_CODE (obj) != MEM_REF)
3cb960c7
ZD
1302 return true;
1303
1304 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1305 loop->num);
1306}
1307
3cb960c7 1308/* Returns false if we can prove that data references A and B do not alias,
02f5d6c5
RG
1309 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1310 considered. */
3cb960c7 1311
f8bf9252 1312bool
02f5d6c5
RG
1313dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1314 bool loop_nest)
3cb960c7 1315{
7d36e538
RG
1316 tree addr_a = DR_BASE_OBJECT (a);
1317 tree addr_b = DR_BASE_OBJECT (b);
3cb960c7 1318
02f5d6c5
RG
1319 /* If we are not processing a loop nest but scalar code we
1320 do not need to care about possible cross-iteration dependences
1321 and thus can process the full original reference. Do so,
1322 similar to how loop invariant motion applies extra offset-based
1323 disambiguation. */
1324 if (!loop_nest)
1325 {
1326 aff_tree off1, off2;
1327 double_int size1, size2;
1328 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1329 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1330 aff_combination_scale (&off1, double_int_minus_one);
1331 aff_combination_add (&off2, &off1);
1332 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1333 return false;
1334 }
1335
c4ddde1b
RG
1336 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1337 the size of the base-object. So we cannot do any offset/overlap
1338 based analysis but have to rely on points-to information only. */
1339 if (TREE_CODE (addr_a) == MEM_REF
1340 && DR_UNCONSTRAINED_BASE (a))
1341 {
1342 if (TREE_CODE (addr_b) == MEM_REF
1343 && DR_UNCONSTRAINED_BASE (b))
1344 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1345 TREE_OPERAND (addr_b, 0));
1346 else
1347 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1348 build_fold_addr_expr (addr_b));
1349 }
1350 else if (TREE_CODE (addr_b) == MEM_REF
1351 && DR_UNCONSTRAINED_BASE (b))
1352 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1353 TREE_OPERAND (addr_b, 0));
1354
1355 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1356 that is being subsetted in the loop nest. */
b0af49c4 1357 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
7d36e538 1358 return refs_output_dependent_p (addr_a, addr_b);
b0af49c4 1359 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
7d36e538
RG
1360 return refs_anti_dependent_p (addr_a, addr_b);
1361 return refs_may_alias_p (addr_a, addr_b);
3cb960c7
ZD
1362}
1363
0ff4040e
SP
1364/* Initialize a data dependence relation between data accesses A and
1365 B. NB_LOOPS is the number of loops surrounding the references: the
1366 size of the classic distance/direction vectors. */
56cf8686 1367
aec7ae7d 1368struct data_dependence_relation *
b8698a0f 1369initialize_data_dependence_relation (struct data_reference *a,
0ff4040e 1370 struct data_reference *b,
ba42e045 1371 VEC (loop_p, heap) *loop_nest)
56cf8686
SP
1372{
1373 struct data_dependence_relation *res;
0ff4040e 1374 unsigned int i;
b8698a0f 1375
5ed6ace5 1376 res = XNEW (struct data_dependence_relation);
56cf8686
SP
1377 DDR_A (res) = a;
1378 DDR_B (res) = b;
3ac57120 1379 DDR_LOOP_NEST (res) = NULL;
71d5b5e1 1380 DDR_REVERSED_P (res) = false;
2f470326
JJ
1381 DDR_SUBSCRIPTS (res) = NULL;
1382 DDR_DIR_VECTS (res) = NULL;
1383 DDR_DIST_VECTS (res) = NULL;
56cf8686 1384
86a07404
IR
1385 if (a == NULL || b == NULL)
1386 {
b8698a0f 1387 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
86a07404 1388 return res;
b8698a0f 1389 }
86a07404 1390
3cb960c7 1391 /* If the data references do not alias, then they are independent. */
02f5d6c5 1392 if (!dr_may_alias_p (a, b, loop_nest != NULL))
86a07404 1393 {
b8698a0f 1394 DDR_ARE_DEPENDENT (res) = chrec_known;
86a07404
IR
1395 return res;
1396 }
56cf8686 1397
fea99a37 1398 /* The case where the references are exactly the same. */
b3924be9
SP
1399 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1400 {
1a4571cb
RL
1401 if (loop_nest
1402 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1403 DR_BASE_OBJECT (a)))
1404 {
1405 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1406 return res;
1407 }
b3924be9
SP
1408 DDR_AFFINE_P (res) = true;
1409 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1410 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1411 DDR_LOOP_NEST (res) = loop_nest;
1412 DDR_INNER_LOOP (res) = 0;
1413 DDR_SELF_REFERENCE (res) = true;
1a4571cb
RL
1414 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1415 {
1416 struct subscript *subscript;
1417
1418 subscript = XNEW (struct subscript);
1419 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1420 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1421 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1422 SUB_DISTANCE (subscript) = chrec_dont_know;
1423 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1424 }
b3924be9
SP
1425 return res;
1426 }
1427
3cb960c7
ZD
1428 /* If the references do not access the same object, we do not know
1429 whether they alias or not. */
1430 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
56cf8686 1431 {
b8698a0f 1432 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
86a07404
IR
1433 return res;
1434 }
0ff4040e 1435
3cb960c7 1436 /* If the base of the object is not invariant in the loop nest, we cannot
0d52bcc1 1437 analyze it. TODO -- in fact, it would suffice to record that there may
c80b4100 1438 be arbitrary dependences in the loops where the base object varies. */
b8698a0f 1439 if (loop_nest
a70d6342
IR
1440 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1441 DR_BASE_OBJECT (a)))
86a07404 1442 {
b8698a0f 1443 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
86a07404
IR
1444 return res;
1445 }
3cb960c7 1446
19368333
RG
1447 /* If the number of dimensions of the access to not agree we can have
1448 a pointer access to a component of the array element type and an
1449 array access while the base-objects are still the same. Punt. */
1450 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1451 {
1452 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1453 return res;
1454 }
3cb960c7 1455
86a07404
IR
1456 DDR_AFFINE_P (res) = true;
1457 DDR_ARE_DEPENDENT (res) = NULL_TREE;
ebf78a47 1458 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
ba42e045 1459 DDR_LOOP_NEST (res) = loop_nest;
3d8864c0 1460 DDR_INNER_LOOP (res) = 0;
b3924be9 1461 DDR_SELF_REFERENCE (res) = false;
304afda6 1462
86a07404
IR
1463 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1464 {
1465 struct subscript *subscript;
b8698a0f 1466
5ed6ace5 1467 subscript = XNEW (struct subscript);
d93817c4
ZD
1468 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1469 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
86a07404
IR
1470 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1471 SUB_DISTANCE (subscript) = chrec_dont_know;
ebf78a47 1472 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
56cf8686 1473 }
ebf78a47 1474
56cf8686
SP
1475 return res;
1476}
1477
d93817c4
ZD
1478/* Frees memory used by the conflict function F. */
1479
1480static void
1481free_conflict_function (conflict_function *f)
1482{
1483 unsigned i;
1484
1485 if (CF_NONTRIVIAL_P (f))
1486 {
1487 for (i = 0; i < f->n; i++)
1488 affine_fn_free (f->fns[i]);
1489 }
1490 free (f);
1491}
1492
1493/* Frees memory used by SUBSCRIPTS. */
1494
1495static void
1496free_subscripts (VEC (subscript_p, heap) *subscripts)
1497{
1498 unsigned i;
1499 subscript_p s;
1500
ac47786e 1501 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
d93817c4
ZD
1502 {
1503 free_conflict_function (s->conflicting_iterations_in_a);
1504 free_conflict_function (s->conflicting_iterations_in_b);
a0044be5 1505 free (s);
d93817c4
ZD
1506 }
1507 VEC_free (subscript_p, heap, subscripts);
1508}
1509
56cf8686
SP
1510/* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1511 description. */
1512
1513static inline void
b8698a0f 1514finalize_ddr_dependent (struct data_dependence_relation *ddr,
56cf8686
SP
1515 tree chrec)
1516{
36d59cf7
DB
1517 if (dump_file && (dump_flags & TDF_DETAILS))
1518 {
1519 fprintf (dump_file, "(dependence classified: ");
1520 print_generic_expr (dump_file, chrec, 0);
1521 fprintf (dump_file, ")\n");
1522 }
1523
b8698a0f 1524 DDR_ARE_DEPENDENT (ddr) = chrec;
d93817c4 1525 free_subscripts (DDR_SUBSCRIPTS (ddr));
2f470326 1526 DDR_SUBSCRIPTS (ddr) = NULL;
56cf8686
SP
1527}
1528
86df10e3
SP
1529/* The dependence relation DDR cannot be represented by a distance
1530 vector. */
1531
1532static inline void
1533non_affine_dependence_relation (struct data_dependence_relation *ddr)
1534{
1535 if (dump_file && (dump_flags & TDF_DETAILS))
1536 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1537
1538 DDR_AFFINE_P (ddr) = false;
1539}
1540
56cf8686
SP
1541\f
1542
1543/* This section contains the classic Banerjee tests. */
1544
1545/* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1546 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1547
1548static inline bool
ed7a4b4b 1549ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
56cf8686
SP
1550{
1551 return (evolution_function_is_constant_p (chrec_a)
1552 && evolution_function_is_constant_p (chrec_b));
1553}
1554
1555/* Returns true iff CHREC_A and CHREC_B are dependent on an index
1556 variable, i.e., if the SIV (Single Index Variable) test is true. */
1557
1558static bool
ed7a4b4b 1559siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
56cf8686
SP
1560{
1561 if ((evolution_function_is_constant_p (chrec_a)
1562 && evolution_function_is_univariate_p (chrec_b))
1563 || (evolution_function_is_constant_p (chrec_b)
1564 && evolution_function_is_univariate_p (chrec_a)))
1565 return true;
b8698a0f 1566
56cf8686
SP
1567 if (evolution_function_is_univariate_p (chrec_a)
1568 && evolution_function_is_univariate_p (chrec_b))
1569 {
1570 switch (TREE_CODE (chrec_a))
1571 {
1572 case POLYNOMIAL_CHREC:
1573 switch (TREE_CODE (chrec_b))
1574 {
1575 case POLYNOMIAL_CHREC:
1576 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1577 return false;
b8698a0f 1578
56cf8686
SP
1579 default:
1580 return true;
1581 }
b8698a0f 1582
56cf8686
SP
1583 default:
1584 return true;
1585 }
1586 }
b8698a0f 1587
56cf8686
SP
1588 return false;
1589}
1590
d93817c4
ZD
1591/* Creates a conflict function with N dimensions. The affine functions
1592 in each dimension follow. */
1593
1594static conflict_function *
1595conflict_fn (unsigned n, ...)
1596{
1597 unsigned i;
1598 conflict_function *ret = XCNEW (conflict_function);
1599 va_list ap;
1600
b39c6706 1601 gcc_assert (0 < n && n <= MAX_DIM);
d93817c4 1602 va_start(ap, n);
b8698a0f 1603
d93817c4
ZD
1604 ret->n = n;
1605 for (i = 0; i < n; i++)
1606 ret->fns[i] = va_arg (ap, affine_fn);
1607 va_end(ap);
1608
1609 return ret;
1610}
1611
1612/* Returns constant affine function with value CST. */
1613
1614static affine_fn
1615affine_fn_cst (tree cst)
1616{
1617 affine_fn fn = VEC_alloc (tree, heap, 1);
1618 VEC_quick_push (tree, fn, cst);
1619 return fn;
1620}
1621
1622/* Returns affine function with single variable, CST + COEF * x_DIM. */
1623
1624static affine_fn
1625affine_fn_univar (tree cst, unsigned dim, tree coef)
1626{
1627 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1628 unsigned i;
1629
1630 gcc_assert (dim > 0);
1631 VEC_quick_push (tree, fn, cst);
1632 for (i = 1; i < dim; i++)
1633 VEC_quick_push (tree, fn, integer_zero_node);
1634 VEC_quick_push (tree, fn, coef);
1635 return fn;
1636}
1637
56cf8686
SP
1638/* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1639 *OVERLAPS_B are initialized to the functions that describe the
1640 relation between the elements accessed twice by CHREC_A and
1641 CHREC_B. For k >= 0, the following property is verified:
1642
1643 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1644
b8698a0f
L
1645static void
1646analyze_ziv_subscript (tree chrec_a,
1647 tree chrec_b,
d93817c4 1648 conflict_function **overlaps_a,
b8698a0f 1649 conflict_function **overlaps_b,
86df10e3 1650 tree *last_conflicts)
56cf8686 1651{
33b30201 1652 tree type, difference;
0ff4040e 1653 dependence_stats.num_ziv++;
b8698a0f 1654
56cf8686
SP
1655 if (dump_file && (dump_flags & TDF_DETAILS))
1656 fprintf (dump_file, "(analyze_ziv_subscript \n");
33b30201
SP
1657
1658 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
726a989a
RB
1659 chrec_a = chrec_convert (type, chrec_a, NULL);
1660 chrec_b = chrec_convert (type, chrec_b, NULL);
33b30201 1661 difference = chrec_fold_minus (type, chrec_a, chrec_b);
b8698a0f 1662
56cf8686
SP
1663 switch (TREE_CODE (difference))
1664 {
1665 case INTEGER_CST:
1666 if (integer_zerop (difference))
1667 {
1668 /* The difference is equal to zero: the accessed index
1669 overlaps for each iteration in the loop. */
d93817c4
ZD
1670 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1671 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
86df10e3 1672 *last_conflicts = chrec_dont_know;
0ff4040e 1673 dependence_stats.num_ziv_dependent++;
56cf8686
SP
1674 }
1675 else
1676 {
1677 /* The accesses do not overlap. */
d93817c4
ZD
1678 *overlaps_a = conflict_fn_no_dependence ();
1679 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 1680 *last_conflicts = integer_zero_node;
0ff4040e 1681 dependence_stats.num_ziv_independent++;
56cf8686
SP
1682 }
1683 break;
b8698a0f 1684
56cf8686 1685 default:
b8698a0f 1686 /* We're not sure whether the indexes overlap. For the moment,
56cf8686 1687 conservatively answer "don't know". */
0ff4040e
SP
1688 if (dump_file && (dump_flags & TDF_DETAILS))
1689 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1690
d93817c4
ZD
1691 *overlaps_a = conflict_fn_not_known ();
1692 *overlaps_b = conflict_fn_not_known ();
86df10e3 1693 *last_conflicts = chrec_dont_know;
0ff4040e 1694 dependence_stats.num_ziv_unimplemented++;
56cf8686
SP
1695 break;
1696 }
b8698a0f 1697
56cf8686
SP
1698 if (dump_file && (dump_flags & TDF_DETAILS))
1699 fprintf (dump_file, ")\n");
1700}
1701
b4a9343c 1702/* Similar to max_stmt_executions_int, but returns the bound as a tree,
4839cb59 1703 and only if it fits to the int type. If this is not the case, or the
b4a9343c 1704 bound on the number of iterations of LOOP could not be derived, returns
4839cb59
ZD
1705 chrec_dont_know. */
1706
1707static tree
b4a9343c 1708max_stmt_executions_tree (struct loop *loop)
4839cb59
ZD
1709{
1710 double_int nit;
4839cb59 1711
652c4c71 1712 if (!max_stmt_executions (loop, &nit))
4839cb59
ZD
1713 return chrec_dont_know;
1714
5f72e02d 1715 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
4839cb59
ZD
1716 return chrec_dont_know;
1717
5f72e02d 1718 return double_int_to_tree (unsigned_type_node, nit);
4839cb59
ZD
1719}
1720
5f1fab58
RG
1721/* Determine whether the CHREC is always positive/negative. If the expression
1722 cannot be statically analyzed, return false, otherwise set the answer into
1723 VALUE. */
1724
1725static bool
1726chrec_is_positive (tree chrec, bool *value)
1727{
1728 bool value0, value1, value2;
1729 tree end_value, nb_iter;
1730
1731 switch (TREE_CODE (chrec))
1732 {
1733 case POLYNOMIAL_CHREC:
1734 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1735 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1736 return false;
1737
1738 /* FIXME -- overflows. */
1739 if (value0 == value1)
1740 {
1741 *value = value0;
1742 return true;
1743 }
1744
1745 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1746 and the proof consists in showing that the sign never
1747 changes during the execution of the loop, from 0 to
1748 loop->nb_iterations. */
1749 if (!evolution_function_is_affine_p (chrec))
1750 return false;
1751
1752 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1753 if (chrec_contains_undetermined (nb_iter))
1754 return false;
1755
1756#if 0
1757 /* TODO -- If the test is after the exit, we may decrease the number of
1758 iterations by one. */
1759 if (after_exit)
1760 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1761#endif
1762
1763 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1764
1765 if (!chrec_is_positive (end_value, &value2))
1766 return false;
1767
1768 *value = value0;
1769 return value0 == value1;
1770
1771 case INTEGER_CST:
1772 switch (tree_int_cst_sgn (chrec))
1773 {
1774 case -1:
1775 *value = false;
1776 break;
1777 case 1:
1778 *value = true;
1779 break;
1780 default:
1781 return false;
1782 }
1783 return true;
1784
1785 default:
1786 return false;
1787 }
1788}
1789
1790
56cf8686
SP
1791/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1792 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1793 *OVERLAPS_B are initialized to the functions that describe the
1794 relation between the elements accessed twice by CHREC_A and
1795 CHREC_B. For k >= 0, the following property is verified:
1796
1797 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1798
1799static void
b8698a0f 1800analyze_siv_subscript_cst_affine (tree chrec_a,
56cf8686 1801 tree chrec_b,
b8698a0f
L
1802 conflict_function **overlaps_a,
1803 conflict_function **overlaps_b,
86df10e3 1804 tree *last_conflicts)
56cf8686
SP
1805{
1806 bool value0, value1, value2;
33b30201 1807 tree type, difference, tmp;
e2157b49 1808
33b30201 1809 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
726a989a
RB
1810 chrec_a = chrec_convert (type, chrec_a, NULL);
1811 chrec_b = chrec_convert (type, chrec_b, NULL);
33b30201 1812 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
b8698a0f 1813
5f1fab58
RG
1814 /* Special case overlap in the first iteration. */
1815 if (integer_zerop (difference))
1816 {
1817 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1818 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1819 *last_conflicts = integer_one_node;
1820 return;
1821 }
1822
56cf8686
SP
1823 if (!chrec_is_positive (initial_condition (difference), &value0))
1824 {
0ff4040e 1825 if (dump_file && (dump_flags & TDF_DETAILS))
b8698a0f 1826 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
0ff4040e
SP
1827
1828 dependence_stats.num_siv_unimplemented++;
d93817c4
ZD
1829 *overlaps_a = conflict_fn_not_known ();
1830 *overlaps_b = conflict_fn_not_known ();
86df10e3 1831 *last_conflicts = chrec_dont_know;
56cf8686
SP
1832 return;
1833 }
1834 else
1835 {
1836 if (value0 == false)
1837 {
1838 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1839 {
0ff4040e
SP
1840 if (dump_file && (dump_flags & TDF_DETAILS))
1841 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1842
d93817c4 1843 *overlaps_a = conflict_fn_not_known ();
b8698a0f 1844 *overlaps_b = conflict_fn_not_known ();
86df10e3 1845 *last_conflicts = chrec_dont_know;
0ff4040e 1846 dependence_stats.num_siv_unimplemented++;
56cf8686
SP
1847 return;
1848 }
1849 else
1850 {
1851 if (value1 == true)
1852 {
b8698a0f 1853 /* Example:
56cf8686
SP
1854 chrec_a = 12
1855 chrec_b = {10, +, 1}
1856 */
b8698a0f 1857
f457cf40 1858 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
56cf8686 1859 {
4839cb59
ZD
1860 HOST_WIDE_INT numiter;
1861 struct loop *loop = get_chrec_loop (chrec_b);
416f403e 1862
d93817c4 1863 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
33b30201
SP
1864 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1865 fold_build1 (ABS_EXPR, type, difference),
d93817c4
ZD
1866 CHREC_RIGHT (chrec_b));
1867 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
86df10e3 1868 *last_conflicts = integer_one_node;
b8698a0f 1869
416f403e
DB
1870
1871 /* Perform weak-zero siv test to see if overlap is
1872 outside the loop bounds. */
652c4c71 1873 numiter = max_stmt_executions_int (loop);
416f403e 1874
4839cb59
ZD
1875 if (numiter >= 0
1876 && compare_tree_int (tmp, numiter) > 0)
416f403e 1877 {
d93817c4
ZD
1878 free_conflict_function (*overlaps_a);
1879 free_conflict_function (*overlaps_b);
1880 *overlaps_a = conflict_fn_no_dependence ();
1881 *overlaps_b = conflict_fn_no_dependence ();
416f403e 1882 *last_conflicts = integer_zero_node;
0ff4040e 1883 dependence_stats.num_siv_independent++;
416f403e 1884 return;
b8698a0f 1885 }
0ff4040e 1886 dependence_stats.num_siv_dependent++;
56cf8686
SP
1887 return;
1888 }
b8698a0f 1889
f457cf40 1890 /* When the step does not divide the difference, there are
56cf8686
SP
1891 no overlaps. */
1892 else
1893 {
d93817c4 1894 *overlaps_a = conflict_fn_no_dependence ();
b8698a0f 1895 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 1896 *last_conflicts = integer_zero_node;
0ff4040e 1897 dependence_stats.num_siv_independent++;
56cf8686
SP
1898 return;
1899 }
1900 }
b8698a0f 1901
56cf8686
SP
1902 else
1903 {
b8698a0f 1904 /* Example:
56cf8686
SP
1905 chrec_a = 12
1906 chrec_b = {10, +, -1}
b8698a0f 1907
56cf8686 1908 In this case, chrec_a will not overlap with chrec_b. */
d93817c4
ZD
1909 *overlaps_a = conflict_fn_no_dependence ();
1910 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 1911 *last_conflicts = integer_zero_node;
0ff4040e 1912 dependence_stats.num_siv_independent++;
56cf8686
SP
1913 return;
1914 }
1915 }
1916 }
b8698a0f 1917 else
56cf8686
SP
1918 {
1919 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1920 {
0ff4040e
SP
1921 if (dump_file && (dump_flags & TDF_DETAILS))
1922 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1923
d93817c4 1924 *overlaps_a = conflict_fn_not_known ();
b8698a0f 1925 *overlaps_b = conflict_fn_not_known ();
86df10e3 1926 *last_conflicts = chrec_dont_know;
0ff4040e 1927 dependence_stats.num_siv_unimplemented++;
56cf8686
SP
1928 return;
1929 }
1930 else
1931 {
1932 if (value2 == false)
1933 {
b8698a0f 1934 /* Example:
56cf8686
SP
1935 chrec_a = 3
1936 chrec_b = {10, +, -1}
1937 */
f457cf40 1938 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
56cf8686 1939 {
4839cb59
ZD
1940 HOST_WIDE_INT numiter;
1941 struct loop *loop = get_chrec_loop (chrec_b);
416f403e 1942
d93817c4 1943 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
33b30201 1944 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
d93817c4
ZD
1945 CHREC_RIGHT (chrec_b));
1946 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
86df10e3 1947 *last_conflicts = integer_one_node;
416f403e
DB
1948
1949 /* Perform weak-zero siv test to see if overlap is
1950 outside the loop bounds. */
652c4c71 1951 numiter = max_stmt_executions_int (loop);
416f403e 1952
4839cb59
ZD
1953 if (numiter >= 0
1954 && compare_tree_int (tmp, numiter) > 0)
416f403e 1955 {
d93817c4
ZD
1956 free_conflict_function (*overlaps_a);
1957 free_conflict_function (*overlaps_b);
1958 *overlaps_a = conflict_fn_no_dependence ();
1959 *overlaps_b = conflict_fn_no_dependence ();
416f403e 1960 *last_conflicts = integer_zero_node;
0ff4040e 1961 dependence_stats.num_siv_independent++;
416f403e 1962 return;
b8698a0f 1963 }
0ff4040e 1964 dependence_stats.num_siv_dependent++;
56cf8686
SP
1965 return;
1966 }
b8698a0f 1967
4286d8ce 1968 /* When the step does not divide the difference, there
56cf8686
SP
1969 are no overlaps. */
1970 else
1971 {
d93817c4 1972 *overlaps_a = conflict_fn_no_dependence ();
b8698a0f 1973 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 1974 *last_conflicts = integer_zero_node;
0ff4040e 1975 dependence_stats.num_siv_independent++;
56cf8686
SP
1976 return;
1977 }
1978 }
1979 else
1980 {
b8698a0f
L
1981 /* Example:
1982 chrec_a = 3
56cf8686 1983 chrec_b = {4, +, 1}
b8698a0f 1984
56cf8686 1985 In this case, chrec_a will not overlap with chrec_b. */
d93817c4
ZD
1986 *overlaps_a = conflict_fn_no_dependence ();
1987 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 1988 *last_conflicts = integer_zero_node;
0ff4040e 1989 dependence_stats.num_siv_independent++;
56cf8686
SP
1990 return;
1991 }
1992 }
1993 }
1994 }
1995}
1996
50300b4c 1997/* Helper recursive function for initializing the matrix A. Returns
86df10e3 1998 the initial value of CHREC. */
56cf8686 1999
5b78fc3e 2000static tree
86df10e3
SP
2001initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2002{
2003 gcc_assert (chrec);
2004
5b78fc3e
JS
2005 switch (TREE_CODE (chrec))
2006 {
2007 case POLYNOMIAL_CHREC:
2008 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2009
2010 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2011 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2012
2013 case PLUS_EXPR:
2014 case MULT_EXPR:
2015 case MINUS_EXPR:
2016 {
2017 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2018 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2019
2020 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2021 }
2022
2023 case NOP_EXPR:
2024 {
2025 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
726a989a 2026 return chrec_convert (chrec_type (chrec), op, NULL);
5b78fc3e
JS
2027 }
2028
418df9d7
JJ
2029 case BIT_NOT_EXPR:
2030 {
2031 /* Handle ~X as -1 - X. */
2032 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2033 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2034 build_int_cst (TREE_TYPE (chrec), -1), op);
2035 }
2036
5b78fc3e
JS
2037 case INTEGER_CST:
2038 return chrec;
a1a5996d 2039
5b78fc3e
JS
2040 default:
2041 gcc_unreachable ();
2042 return NULL_TREE;
2043 }
86df10e3
SP
2044}
2045
2046#define FLOOR_DIV(x,y) ((x) / (y))
2047
b8698a0f 2048/* Solves the special case of the Diophantine equation:
86df10e3
SP
2049 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2050
2051 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2052 number of iterations that loops X and Y run. The overlaps will be
2053 constructed as evolutions in dimension DIM. */
56cf8686
SP
2054
2055static void
b8698a0f 2056compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
d93817c4 2057 affine_fn *overlaps_a,
b8698a0f 2058 affine_fn *overlaps_b,
86df10e3
SP
2059 tree *last_conflicts, int dim)
2060{
2061 if (((step_a > 0 && step_b > 0)
2062 || (step_a < 0 && step_b < 0)))
2063 {
2064 int step_overlaps_a, step_overlaps_b;
2065 int gcd_steps_a_b, last_conflict, tau2;
2066
2067 gcd_steps_a_b = gcd (step_a, step_b);
2068 step_overlaps_a = step_b / gcd_steps_a_b;
2069 step_overlaps_b = step_a / gcd_steps_a_b;
2070
2c26cbfd
SP
2071 if (niter > 0)
2072 {
2073 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2074 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2075 last_conflict = tau2;
2076 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2077 }
2078 else
2079 *last_conflicts = chrec_dont_know;
86df10e3 2080
b8698a0f 2081 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
d93817c4
ZD
2082 build_int_cst (NULL_TREE,
2083 step_overlaps_a));
b8698a0f
L
2084 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2085 build_int_cst (NULL_TREE,
d93817c4 2086 step_overlaps_b));
86df10e3
SP
2087 }
2088
2089 else
2090 {
d93817c4
ZD
2091 *overlaps_a = affine_fn_cst (integer_zero_node);
2092 *overlaps_b = affine_fn_cst (integer_zero_node);
86df10e3
SP
2093 *last_conflicts = integer_zero_node;
2094 }
2095}
2096
86df10e3
SP
2097/* Solves the special case of a Diophantine equation where CHREC_A is
2098 an affine bivariate function, and CHREC_B is an affine univariate
b8698a0f 2099 function. For example,
86df10e3
SP
2100
2101 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
b8698a0f
L
2102
2103 has the following overlapping functions:
86df10e3
SP
2104
2105 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2106 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2107 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2108
35fd3193 2109 FORNOW: This is a specialized implementation for a case occurring in
86df10e3
SP
2110 a common benchmark. Implement the general algorithm. */
2111
2112static void
b8698a0f 2113compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
d93817c4 2114 conflict_function **overlaps_a,
b8698a0f 2115 conflict_function **overlaps_b,
86df10e3 2116 tree *last_conflicts)
56cf8686 2117{
86df10e3
SP
2118 bool xz_p, yz_p, xyz_p;
2119 int step_x, step_y, step_z;
4839cb59 2120 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
d93817c4
ZD
2121 affine_fn overlaps_a_xz, overlaps_b_xz;
2122 affine_fn overlaps_a_yz, overlaps_b_yz;
2123 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2124 affine_fn ova1, ova2, ovb;
2125 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
86df10e3 2126
6b6fa4e9
SP
2127 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2128 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2129 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
86df10e3 2130
652c4c71
RG
2131 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2132 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2133 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
b8698a0f 2134
4839cb59 2135 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
86df10e3 2136 {
0ff4040e
SP
2137 if (dump_file && (dump_flags & TDF_DETAILS))
2138 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
b8698a0f 2139
d93817c4
ZD
2140 *overlaps_a = conflict_fn_not_known ();
2141 *overlaps_b = conflict_fn_not_known ();
86df10e3
SP
2142 *last_conflicts = chrec_dont_know;
2143 return;
2144 }
2145
86df10e3
SP
2146 niter = MIN (niter_x, niter_z);
2147 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2148 &overlaps_a_xz,
2149 &overlaps_b_xz,
2150 &last_conflicts_xz, 1);
2151 niter = MIN (niter_y, niter_z);
2152 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2153 &overlaps_a_yz,
2154 &overlaps_b_yz,
2155 &last_conflicts_yz, 2);
2156 niter = MIN (niter_x, niter_z);
2157 niter = MIN (niter_y, niter);
2158 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2159 &overlaps_a_xyz,
2160 &overlaps_b_xyz,
2161 &last_conflicts_xyz, 3);
2162
2163 xz_p = !integer_zerop (last_conflicts_xz);
2164 yz_p = !integer_zerop (last_conflicts_yz);
2165 xyz_p = !integer_zerop (last_conflicts_xyz);
2166
2167 if (xz_p || yz_p || xyz_p)
2168 {
d93817c4
ZD
2169 ova1 = affine_fn_cst (integer_zero_node);
2170 ova2 = affine_fn_cst (integer_zero_node);
2171 ovb = affine_fn_cst (integer_zero_node);
86df10e3
SP
2172 if (xz_p)
2173 {
d93817c4
ZD
2174 affine_fn t0 = ova1;
2175 affine_fn t2 = ovb;
2176
2177 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2178 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2179 affine_fn_free (t0);
2180 affine_fn_free (t2);
86df10e3
SP
2181 *last_conflicts = last_conflicts_xz;
2182 }
2183 if (yz_p)
2184 {
d93817c4
ZD
2185 affine_fn t0 = ova2;
2186 affine_fn t2 = ovb;
2187
2188 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2189 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2190 affine_fn_free (t0);
2191 affine_fn_free (t2);
86df10e3
SP
2192 *last_conflicts = last_conflicts_yz;
2193 }
2194 if (xyz_p)
2195 {
d93817c4
ZD
2196 affine_fn t0 = ova1;
2197 affine_fn t2 = ova2;
2198 affine_fn t4 = ovb;
2199
2200 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2201 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2202 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2203 affine_fn_free (t0);
2204 affine_fn_free (t2);
2205 affine_fn_free (t4);
86df10e3
SP
2206 *last_conflicts = last_conflicts_xyz;
2207 }
d93817c4
ZD
2208 *overlaps_a = conflict_fn (2, ova1, ova2);
2209 *overlaps_b = conflict_fn (1, ovb);
86df10e3
SP
2210 }
2211 else
2212 {
d93817c4
ZD
2213 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2214 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
86df10e3
SP
2215 *last_conflicts = integer_zero_node;
2216 }
d93817c4
ZD
2217
2218 affine_fn_free (overlaps_a_xz);
2219 affine_fn_free (overlaps_b_xz);
2220 affine_fn_free (overlaps_a_yz);
2221 affine_fn_free (overlaps_b_yz);
2222 affine_fn_free (overlaps_a_xyz);
2223 affine_fn_free (overlaps_b_xyz);
56cf8686
SP
2224}
2225
b305e3da
SP
2226/* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2227
2228static void
2229lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2230 int size)
2231{
2232 memcpy (vec2, vec1, size * sizeof (*vec1));
2233}
2234
2235/* Copy the elements of M x N matrix MAT1 to MAT2. */
2236
2237static void
2238lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2239 int m, int n)
2240{
2241 int i;
2242
2243 for (i = 0; i < m; i++)
2244 lambda_vector_copy (mat1[i], mat2[i], n);
2245}
2246
2247/* Store the N x N identity matrix in MAT. */
2248
2249static void
2250lambda_matrix_id (lambda_matrix mat, int size)
2251{
2252 int i, j;
2253
2254 for (i = 0; i < size; i++)
2255 for (j = 0; j < size; j++)
2256 mat[i][j] = (i == j) ? 1 : 0;
2257}
2258
2259/* Return the first nonzero element of vector VEC1 between START and N.
2260 We must have START <= N. Returns N if VEC1 is the zero vector. */
2261
2262static int
2263lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2264{
2265 int j = start;
2266 while (j < n && vec1[j] == 0)
2267 j++;
2268 return j;
2269}
2270
2271/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2272 R2 = R2 + CONST1 * R1. */
2273
2274static void
2275lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2276{
2277 int i;
2278
2279 if (const1 == 0)
2280 return;
2281
2282 for (i = 0; i < n; i++)
2283 mat[r2][i] += const1 * mat[r1][i];
2284}
2285
2286/* Swap rows R1 and R2 in matrix MAT. */
2287
2288static void
2289lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2290{
2291 lambda_vector row;
2292
2293 row = mat[r1];
2294 mat[r1] = mat[r2];
2295 mat[r2] = row;
2296}
2297
2298/* Multiply vector VEC1 of length SIZE by a constant CONST1,
2299 and store the result in VEC2. */
2300
2301static void
2302lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2303 int size, int const1)
2304{
2305 int i;
2306
2307 if (const1 == 0)
2308 lambda_vector_clear (vec2, size);
2309 else
2310 for (i = 0; i < size; i++)
2311 vec2[i] = const1 * vec1[i];
2312}
2313
2314/* Negate vector VEC1 with length SIZE and store it in VEC2. */
2315
2316static void
2317lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2318 int size)
2319{
2320 lambda_vector_mult_const (vec1, vec2, size, -1);
2321}
2322
2323/* Negate row R1 of matrix MAT which has N columns. */
2324
2325static void
2326lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2327{
2328 lambda_vector_negate (mat[r1], mat[r1], n);
2329}
2330
2331/* Return true if two vectors are equal. */
2332
2333static bool
2334lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2335{
2336 int i;
2337 for (i = 0; i < size; i++)
2338 if (vec1[i] != vec2[i])
2339 return false;
2340 return true;
2341}
2342
2343/* Given an M x N integer matrix A, this function determines an M x
2344 M unimodular matrix U, and an M x N echelon matrix S such that
2345 "U.A = S". This decomposition is also known as "right Hermite".
2346
2347 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2348 Restructuring Compilers" Utpal Banerjee. */
2349
2350static void
2351lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2352 lambda_matrix S, lambda_matrix U)
2353{
2354 int i, j, i0 = 0;
2355
2356 lambda_matrix_copy (A, S, m, n);
2357 lambda_matrix_id (U, m);
2358
2359 for (j = 0; j < n; j++)
2360 {
2361 if (lambda_vector_first_nz (S[j], m, i0) < m)
2362 {
2363 ++i0;
2364 for (i = m - 1; i >= i0; i--)
2365 {
2366 while (S[i][j] != 0)
2367 {
2368 int sigma, factor, a, b;
2369
2370 a = S[i-1][j];
2371 b = S[i][j];
2372 sigma = (a * b < 0) ? -1: 1;
2373 a = abs (a);
2374 b = abs (b);
2375 factor = sigma * (a / b);
2376
2377 lambda_matrix_row_add (S, n, i, i-1, -factor);
2378 lambda_matrix_row_exchange (S, i, i-1);
2379
2380 lambda_matrix_row_add (U, m, i, i-1, -factor);
2381 lambda_matrix_row_exchange (U, i, i-1);
2382 }
2383 }
2384 }
2385 }
2386}
2387
56cf8686 2388/* Determines the overlapping elements due to accesses CHREC_A and
0ff4040e
SP
2389 CHREC_B, that are affine functions. This function cannot handle
2390 symbolic evolution functions, ie. when initial conditions are
2391 parameters, because it uses lambda matrices of integers. */
56cf8686
SP
2392
2393static void
b8698a0f 2394analyze_subscript_affine_affine (tree chrec_a,
56cf8686 2395 tree chrec_b,
b8698a0f
L
2396 conflict_function **overlaps_a,
2397 conflict_function **overlaps_b,
86df10e3 2398 tree *last_conflicts)
56cf8686 2399{
86df10e3 2400 unsigned nb_vars_a, nb_vars_b, dim;
fd727b34 2401 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
86df10e3 2402 lambda_matrix A, U, S;
f873b205 2403 struct obstack scratch_obstack;
86df10e3 2404
e2157b49 2405 if (eq_evolutions_p (chrec_a, chrec_b))
416f403e 2406 {
e2157b49
SP
2407 /* The accessed index overlaps for each iteration in the
2408 loop. */
d93817c4
ZD
2409 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2410 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
416f403e
DB
2411 *last_conflicts = chrec_dont_know;
2412 return;
2413 }
56cf8686
SP
2414 if (dump_file && (dump_flags & TDF_DETAILS))
2415 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
b8698a0f 2416
56cf8686
SP
2417 /* For determining the initial intersection, we have to solve a
2418 Diophantine equation. This is the most time consuming part.
b8698a0f 2419
56cf8686
SP
2420 For answering to the question: "Is there a dependence?" we have
2421 to prove that there exists a solution to the Diophantine
2422 equation, and that the solution is in the iteration domain,
89dbed81 2423 i.e. the solution is positive or zero, and that the solution
56cf8686
SP
2424 happens before the upper bound loop.nb_iterations. Otherwise
2425 there is no dependence. This function outputs a description of
2426 the iterations that hold the intersections. */
2427
86df10e3
SP
2428 nb_vars_a = nb_vars_in_chrec (chrec_a);
2429 nb_vars_b = nb_vars_in_chrec (chrec_b);
2430
f873b205
LB
2431 gcc_obstack_init (&scratch_obstack);
2432
86df10e3 2433 dim = nb_vars_a + nb_vars_b;
f873b205
LB
2434 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2435 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2436 S = lambda_matrix_new (dim, 1, &scratch_obstack);
86df10e3 2437
5b78fc3e
JS
2438 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2439 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
86df10e3
SP
2440 gamma = init_b - init_a;
2441
2442 /* Don't do all the hard work of solving the Diophantine equation
b8698a0f 2443 when we already know the solution: for example,
86df10e3
SP
2444 | {3, +, 1}_1
2445 | {3, +, 4}_2
2446 | gamma = 3 - 3 = 0.
b8698a0f 2447 Then the first overlap occurs during the first iterations:
86df10e3
SP
2448 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2449 */
2450 if (gamma == 0)
56cf8686 2451 {
86df10e3 2452 if (nb_vars_a == 1 && nb_vars_b == 1)
56cf8686 2453 {
fd727b34 2454 HOST_WIDE_INT step_a, step_b;
4839cb59 2455 HOST_WIDE_INT niter, niter_a, niter_b;
d93817c4 2456 affine_fn ova, ovb;
86df10e3 2457
652c4c71
RG
2458 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2459 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
86df10e3 2460 niter = MIN (niter_a, niter_b);
6b6fa4e9
SP
2461 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2462 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
86df10e3 2463
b8698a0f
L
2464 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2465 &ova, &ovb,
86df10e3 2466 last_conflicts, 1);
d93817c4
ZD
2467 *overlaps_a = conflict_fn (1, ova);
2468 *overlaps_b = conflict_fn (1, ovb);
56cf8686 2469 }
86df10e3
SP
2470
2471 else if (nb_vars_a == 2 && nb_vars_b == 1)
2472 compute_overlap_steps_for_affine_1_2
2473 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2474
2475 else if (nb_vars_a == 1 && nb_vars_b == 2)
2476 compute_overlap_steps_for_affine_1_2
2477 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2478
2479 else
56cf8686 2480 {
0ff4040e
SP
2481 if (dump_file && (dump_flags & TDF_DETAILS))
2482 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
d93817c4
ZD
2483 *overlaps_a = conflict_fn_not_known ();
2484 *overlaps_b = conflict_fn_not_known ();
86df10e3 2485 *last_conflicts = chrec_dont_know;
56cf8686 2486 }
0ff4040e 2487 goto end_analyze_subs_aa;
86df10e3
SP
2488 }
2489
2490 /* U.A = S */
2491 lambda_matrix_right_hermite (A, dim, 1, S, U);
2492
2493 if (S[0][0] < 0)
2494 {
2495 S[0][0] *= -1;
2496 lambda_matrix_row_negate (U, dim, 0);
2497 }
2498 gcd_alpha_beta = S[0][0];
2499
ba42e045
SP
2500 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2501 but that is a quite strange case. Instead of ICEing, answer
2502 don't know. */
2503 if (gcd_alpha_beta == 0)
2504 {
d93817c4
ZD
2505 *overlaps_a = conflict_fn_not_known ();
2506 *overlaps_b = conflict_fn_not_known ();
ba42e045
SP
2507 *last_conflicts = chrec_dont_know;
2508 goto end_analyze_subs_aa;
2509 }
2510
86df10e3
SP
2511 /* The classic "gcd-test". */
2512 if (!int_divides_p (gcd_alpha_beta, gamma))
2513 {
2514 /* The "gcd-test" has determined that there is no integer
2515 solution, i.e. there is no dependence. */
d93817c4
ZD
2516 *overlaps_a = conflict_fn_no_dependence ();
2517 *overlaps_b = conflict_fn_no_dependence ();
86df10e3
SP
2518 *last_conflicts = integer_zero_node;
2519 }
2520
2521 /* Both access functions are univariate. This includes SIV and MIV cases. */
2522 else if (nb_vars_a == 1 && nb_vars_b == 1)
2523 {
2524 /* Both functions should have the same evolution sign. */
2525 if (((A[0][0] > 0 && -A[1][0] > 0)
2526 || (A[0][0] < 0 && -A[1][0] < 0)))
56cf8686
SP
2527 {
2528 /* The solutions are given by:
b8698a0f 2529 |
86df10e3
SP
2530 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2531 | [u21 u22] [y0]
b8698a0f 2532
56cf8686 2533 For a given integer t. Using the following variables,
b8698a0f 2534
56cf8686
SP
2535 | i0 = u11 * gamma / gcd_alpha_beta
2536 | j0 = u12 * gamma / gcd_alpha_beta
2537 | i1 = u21
2538 | j1 = u22
b8698a0f 2539
56cf8686 2540 the solutions are:
b8698a0f
L
2541
2542 | x0 = i0 + i1 * t,
86df10e3 2543 | y0 = j0 + j1 * t. */
2c26cbfd 2544 HOST_WIDE_INT i0, j0, i1, j1;
86df10e3
SP
2545
2546 i0 = U[0][0] * gamma / gcd_alpha_beta;
2547 j0 = U[0][1] * gamma / gcd_alpha_beta;
2548 i1 = U[1][0];
2549 j1 = U[1][1];
2550
2551 if ((i1 == 0 && i0 < 0)
2552 || (j1 == 0 && j0 < 0))
56cf8686 2553 {
b8698a0f
L
2554 /* There is no solution.
2555 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2556 falls in here, but for the moment we don't look at the
56cf8686 2557 upper bound of the iteration domain. */
d93817c4
ZD
2558 *overlaps_a = conflict_fn_no_dependence ();
2559 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 2560 *last_conflicts = integer_zero_node;
2c26cbfd 2561 goto end_analyze_subs_aa;
86df10e3
SP
2562 }
2563
2c26cbfd 2564 if (i1 > 0 && j1 > 0)
56cf8686 2565 {
652c4c71
RG
2566 HOST_WIDE_INT niter_a
2567 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2568 HOST_WIDE_INT niter_b
2569 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2c26cbfd
SP
2570 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2571
2572 /* (X0, Y0) is a solution of the Diophantine equation:
2573 "chrec_a (X0) = chrec_b (Y0)". */
2574 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2575 CEIL (-j0, j1));
2576 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2577 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2578
2579 /* (X1, Y1) is the smallest positive solution of the eq
2580 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2581 first conflict occurs. */
2582 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2583 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2584 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2585
2586 if (niter > 0)
56cf8686 2587 {
2c26cbfd
SP
2588 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2589 FLOOR_DIV (niter - j0, j1));
2590 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
86df10e3 2591
2c26cbfd
SP
2592 /* If the overlap occurs outside of the bounds of the
2593 loop, there is no dependence. */
9e517d61 2594 if (x1 >= niter || y1 >= niter)
56cf8686 2595 {
2c26cbfd
SP
2596 *overlaps_a = conflict_fn_no_dependence ();
2597 *overlaps_b = conflict_fn_no_dependence ();
2598 *last_conflicts = integer_zero_node;
2599 goto end_analyze_subs_aa;
56cf8686
SP
2600 }
2601 else
2c26cbfd 2602 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
56cf8686 2603 }
56cf8686 2604 else
2c26cbfd
SP
2605 *last_conflicts = chrec_dont_know;
2606
2607 *overlaps_a
2608 = conflict_fn (1,
2609 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2610 1,
2611 build_int_cst (NULL_TREE, i1)));
2612 *overlaps_b
2613 = conflict_fn (1,
2614 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2615 1,
2616 build_int_cst (NULL_TREE, j1)));
2617 }
2618 else
2619 {
2620 /* FIXME: For the moment, the upper bound of the
2621 iteration domain for i and j is not checked. */
2622 if (dump_file && (dump_flags & TDF_DETAILS))
2623 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2624 *overlaps_a = conflict_fn_not_known ();
2625 *overlaps_b = conflict_fn_not_known ();
2626 *last_conflicts = chrec_dont_know;
56cf8686
SP
2627 }
2628 }
86df10e3
SP
2629 else
2630 {
0ff4040e
SP
2631 if (dump_file && (dump_flags & TDF_DETAILS))
2632 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
d93817c4
ZD
2633 *overlaps_a = conflict_fn_not_known ();
2634 *overlaps_b = conflict_fn_not_known ();
86df10e3
SP
2635 *last_conflicts = chrec_dont_know;
2636 }
56cf8686 2637 }
56cf8686
SP
2638 else
2639 {
0ff4040e
SP
2640 if (dump_file && (dump_flags & TDF_DETAILS))
2641 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
d93817c4
ZD
2642 *overlaps_a = conflict_fn_not_known ();
2643 *overlaps_b = conflict_fn_not_known ();
86df10e3 2644 *last_conflicts = chrec_dont_know;
56cf8686 2645 }
86df10e3 2646
b8698a0f 2647end_analyze_subs_aa:
f873b205 2648 obstack_free (&scratch_obstack, NULL);
56cf8686
SP
2649 if (dump_file && (dump_flags & TDF_DETAILS))
2650 {
2651 fprintf (dump_file, " (overlaps_a = ");
d93817c4 2652 dump_conflict_function (dump_file, *overlaps_a);
56cf8686 2653 fprintf (dump_file, ")\n (overlaps_b = ");
d93817c4 2654 dump_conflict_function (dump_file, *overlaps_b);
56cf8686 2655 fprintf (dump_file, ")\n");
0ff4040e 2656 fprintf (dump_file, ")\n");
56cf8686 2657 }
0ff4040e
SP
2658}
2659
2660/* Returns true when analyze_subscript_affine_affine can be used for
2661 determining the dependence relation between chrec_a and chrec_b,
2662 that contain symbols. This function modifies chrec_a and chrec_b
2663 such that the analysis result is the same, and such that they don't
b8698a0f 2664 contain symbols, and then can safely be passed to the analyzer.
0ff4040e
SP
2665
2666 Example: The analysis of the following tuples of evolutions produce
2667 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2668 vs. {0, +, 1}_1
b8698a0f 2669
0ff4040e
SP
2670 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2671 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2672*/
2673
2674static bool
2675can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2676{
16a2acea 2677 tree diff, type, left_a, left_b, right_b;
0ff4040e
SP
2678
2679 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2680 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2681 /* FIXME: For the moment not handled. Might be refined later. */
2682 return false;
2683
16a2acea
SP
2684 type = chrec_type (*chrec_a);
2685 left_a = CHREC_LEFT (*chrec_a);
726a989a 2686 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
16a2acea
SP
2687 diff = chrec_fold_minus (type, left_a, left_b);
2688
0ff4040e
SP
2689 if (!evolution_function_is_constant_p (diff))
2690 return false;
2691
56cf8686 2692 if (dump_file && (dump_flags & TDF_DETAILS))
0ff4040e
SP
2693 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2694
b8698a0f 2695 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
0ff4040e 2696 diff, CHREC_RIGHT (*chrec_a));
726a989a 2697 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
0ff4040e 2698 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
dc61cc6b 2699 build_int_cst (type, 0),
16a2acea 2700 right_b);
0ff4040e 2701 return true;
56cf8686
SP
2702}
2703
2704/* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2705 *OVERLAPS_B are initialized to the functions that describe the
2706 relation between the elements accessed twice by CHREC_A and
2707 CHREC_B. For k >= 0, the following property is verified:
2708
2709 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2710
2711static void
b8698a0f 2712analyze_siv_subscript (tree chrec_a,
56cf8686 2713 tree chrec_b,
b8698a0f
L
2714 conflict_function **overlaps_a,
2715 conflict_function **overlaps_b,
5b78fc3e
JS
2716 tree *last_conflicts,
2717 int loop_nest_num)
56cf8686 2718{
0ff4040e 2719 dependence_stats.num_siv++;
b8698a0f 2720
56cf8686
SP
2721 if (dump_file && (dump_flags & TDF_DETAILS))
2722 fprintf (dump_file, "(analyze_siv_subscript \n");
b8698a0f 2723
56cf8686 2724 if (evolution_function_is_constant_p (chrec_a)
5b78fc3e 2725 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
b8698a0f 2726 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
86df10e3 2727 overlaps_a, overlaps_b, last_conflicts);
b8698a0f 2728
5b78fc3e 2729 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
56cf8686 2730 && evolution_function_is_constant_p (chrec_b))
b8698a0f 2731 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
86df10e3 2732 overlaps_b, overlaps_a, last_conflicts);
b8698a0f 2733
5b78fc3e
JS
2734 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2735 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
0ff4040e
SP
2736 {
2737 if (!chrec_contains_symbols (chrec_a)
2738 && !chrec_contains_symbols (chrec_b))
2739 {
b8698a0f
L
2740 analyze_subscript_affine_affine (chrec_a, chrec_b,
2741 overlaps_a, overlaps_b,
0ff4040e
SP
2742 last_conflicts);
2743
d93817c4
ZD
2744 if (CF_NOT_KNOWN_P (*overlaps_a)
2745 || CF_NOT_KNOWN_P (*overlaps_b))
0ff4040e 2746 dependence_stats.num_siv_unimplemented++;
d93817c4
ZD
2747 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2748 || CF_NO_DEPENDENCE_P (*overlaps_b))
0ff4040e
SP
2749 dependence_stats.num_siv_independent++;
2750 else
2751 dependence_stats.num_siv_dependent++;
2752 }
b8698a0f 2753 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
0ff4040e
SP
2754 &chrec_b))
2755 {
b8698a0f
L
2756 analyze_subscript_affine_affine (chrec_a, chrec_b,
2757 overlaps_a, overlaps_b,
0ff4040e 2758 last_conflicts);
0ff4040e 2759
d93817c4
ZD
2760 if (CF_NOT_KNOWN_P (*overlaps_a)
2761 || CF_NOT_KNOWN_P (*overlaps_b))
0ff4040e 2762 dependence_stats.num_siv_unimplemented++;
d93817c4
ZD
2763 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2764 || CF_NO_DEPENDENCE_P (*overlaps_b))
0ff4040e
SP
2765 dependence_stats.num_siv_independent++;
2766 else
2767 dependence_stats.num_siv_dependent++;
2768 }
2769 else
2770 goto siv_subscript_dontknow;
2771 }
2772
56cf8686
SP
2773 else
2774 {
0ff4040e
SP
2775 siv_subscript_dontknow:;
2776 if (dump_file && (dump_flags & TDF_DETAILS))
2777 fprintf (dump_file, "siv test failed: unimplemented.\n");
d93817c4
ZD
2778 *overlaps_a = conflict_fn_not_known ();
2779 *overlaps_b = conflict_fn_not_known ();
86df10e3 2780 *last_conflicts = chrec_dont_know;
0ff4040e 2781 dependence_stats.num_siv_unimplemented++;
56cf8686 2782 }
b8698a0f 2783
56cf8686
SP
2784 if (dump_file && (dump_flags & TDF_DETAILS))
2785 fprintf (dump_file, ")\n");
2786}
2787
55a700ac
ZD
2788/* Returns false if we can prove that the greatest common divisor of the steps
2789 of CHREC does not divide CST, false otherwise. */
56cf8686
SP
2790
2791static bool
ed7a4b4b 2792gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
56cf8686 2793{
55a700ac
ZD
2794 HOST_WIDE_INT cd = 0, val;
2795 tree step;
0ff4040e 2796
55a700ac
ZD
2797 if (!host_integerp (cst, 0))
2798 return true;
2799 val = tree_low_cst (cst, 0);
2800
2801 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2802 {
2803 step = CHREC_RIGHT (chrec);
2804 if (!host_integerp (step, 0))
2805 return true;
2806 cd = gcd (cd, tree_low_cst (step, 0));
2807 chrec = CHREC_LEFT (chrec);
56cf8686 2808 }
55a700ac
ZD
2809
2810 return val % cd == 0;
56cf8686
SP
2811}
2812
da9a21f4
SP
2813/* Analyze a MIV (Multiple Index Variable) subscript with respect to
2814 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2815 functions that describe the relation between the elements accessed
2816 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2817 is verified:
56cf8686
SP
2818
2819 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2820
2821static void
b8698a0f
L
2822analyze_miv_subscript (tree chrec_a,
2823 tree chrec_b,
2824 conflict_function **overlaps_a,
2825 conflict_function **overlaps_b,
da9a21f4
SP
2826 tree *last_conflicts,
2827 struct loop *loop_nest)
56cf8686 2828{
33b30201
SP
2829 tree type, difference;
2830
0ff4040e 2831 dependence_stats.num_miv++;
56cf8686
SP
2832 if (dump_file && (dump_flags & TDF_DETAILS))
2833 fprintf (dump_file, "(analyze_miv_subscript \n");
e2157b49 2834
33b30201 2835 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
726a989a
RB
2836 chrec_a = chrec_convert (type, chrec_a, NULL);
2837 chrec_b = chrec_convert (type, chrec_b, NULL);
33b30201 2838 difference = chrec_fold_minus (type, chrec_a, chrec_b);
b8698a0f 2839
e2157b49 2840 if (eq_evolutions_p (chrec_a, chrec_b))
56cf8686
SP
2841 {
2842 /* Access functions are the same: all the elements are accessed
2843 in the same order. */
d93817c4
ZD
2844 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2845 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
b4a9343c 2846 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
0ff4040e 2847 dependence_stats.num_miv_dependent++;
56cf8686 2848 }
b8698a0f 2849
56cf8686
SP
2850 else if (evolution_function_is_constant_p (difference)
2851 /* For the moment, the following is verified:
da9a21f4
SP
2852 evolution_function_is_affine_multivariate_p (chrec_a,
2853 loop_nest->num) */
55a700ac 2854 && !gcd_of_steps_may_divide_p (chrec_a, difference))
56cf8686
SP
2855 {
2856 /* testsuite/.../ssa-chrec-33.c
b8698a0f
L
2857 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2858
55a700ac
ZD
2859 The difference is 1, and all the evolution steps are multiples
2860 of 2, consequently there are no overlapping elements. */
d93817c4
ZD
2861 *overlaps_a = conflict_fn_no_dependence ();
2862 *overlaps_b = conflict_fn_no_dependence ();
86df10e3 2863 *last_conflicts = integer_zero_node;
0ff4040e 2864 dependence_stats.num_miv_independent++;
56cf8686 2865 }
b8698a0f 2866
da9a21f4 2867 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
0ff4040e 2868 && !chrec_contains_symbols (chrec_a)
da9a21f4 2869 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
0ff4040e 2870 && !chrec_contains_symbols (chrec_b))
56cf8686
SP
2871 {
2872 /* testsuite/.../ssa-chrec-35.c
2873 {0, +, 1}_2 vs. {0, +, 1}_3
2874 the overlapping elements are respectively located at iterations:
b8698a0f
L
2875 {0, +, 1}_x and {0, +, 1}_x,
2876 in other words, we have the equality:
86df10e3 2877 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
b8698a0f
L
2878
2879 Other examples:
2880 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
86df10e3
SP
2881 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2882
b8698a0f 2883 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
86df10e3 2884 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
56cf8686 2885 */
b8698a0f 2886 analyze_subscript_affine_affine (chrec_a, chrec_b,
86df10e3 2887 overlaps_a, overlaps_b, last_conflicts);
0ff4040e 2888
d93817c4
ZD
2889 if (CF_NOT_KNOWN_P (*overlaps_a)
2890 || CF_NOT_KNOWN_P (*overlaps_b))
0ff4040e 2891 dependence_stats.num_miv_unimplemented++;
d93817c4
ZD
2892 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2893 || CF_NO_DEPENDENCE_P (*overlaps_b))
0ff4040e
SP
2894 dependence_stats.num_miv_independent++;
2895 else
2896 dependence_stats.num_miv_dependent++;
56cf8686 2897 }
b8698a0f 2898
56cf8686
SP
2899 else
2900 {
2901 /* When the analysis is too difficult, answer "don't know". */
0ff4040e
SP
2902 if (dump_file && (dump_flags & TDF_DETAILS))
2903 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2904
d93817c4
ZD
2905 *overlaps_a = conflict_fn_not_known ();
2906 *overlaps_b = conflict_fn_not_known ();
86df10e3 2907 *last_conflicts = chrec_dont_know;
0ff4040e 2908 dependence_stats.num_miv_unimplemented++;
56cf8686 2909 }
b8698a0f 2910
56cf8686
SP
2911 if (dump_file && (dump_flags & TDF_DETAILS))
2912 fprintf (dump_file, ")\n");
2913}
2914
da9a21f4
SP
2915/* Determines the iterations for which CHREC_A is equal to CHREC_B in
2916 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2917 OVERLAP_ITERATIONS_B are initialized with two functions that
2918 describe the iterations that contain conflicting elements.
b8698a0f 2919
56cf8686 2920 Remark: For an integer k >= 0, the following equality is true:
b8698a0f 2921
56cf8686
SP
2922 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2923*/
2924
b8698a0f
L
2925static void
2926analyze_overlapping_iterations (tree chrec_a,
2927 tree chrec_b,
2928 conflict_function **overlap_iterations_a,
2929 conflict_function **overlap_iterations_b,
da9a21f4 2930 tree *last_conflicts, struct loop *loop_nest)
56cf8686 2931{
da9a21f4
SP
2932 unsigned int lnn = loop_nest->num;
2933
0ff4040e 2934 dependence_stats.num_subscript_tests++;
b8698a0f 2935
56cf8686
SP
2936 if (dump_file && (dump_flags & TDF_DETAILS))
2937 {
2938 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2939 fprintf (dump_file, " (chrec_a = ");
2940 print_generic_expr (dump_file, chrec_a, 0);
0ff4040e 2941 fprintf (dump_file, ")\n (chrec_b = ");
56cf8686
SP
2942 print_generic_expr (dump_file, chrec_b, 0);
2943 fprintf (dump_file, ")\n");
2944 }
0ff4040e 2945
56cf8686
SP
2946 if (chrec_a == NULL_TREE
2947 || chrec_b == NULL_TREE
2948 || chrec_contains_undetermined (chrec_a)
0ff4040e 2949 || chrec_contains_undetermined (chrec_b))
56cf8686 2950 {
0ff4040e 2951 dependence_stats.num_subscript_undetermined++;
b8698a0f 2952
d93817c4
ZD
2953 *overlap_iterations_a = conflict_fn_not_known ();
2954 *overlap_iterations_b = conflict_fn_not_known ();
56cf8686 2955 }
0ff4040e 2956
b8698a0f 2957 /* If they are the same chrec, and are affine, they overlap
0ff4040e
SP
2958 on every iteration. */
2959 else if (eq_evolutions_p (chrec_a, chrec_b)
3e6f8b56
SP
2960 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2961 || operand_equal_p (chrec_a, chrec_b, 0)))
0ff4040e
SP
2962 {
2963 dependence_stats.num_same_subscript_function++;
d93817c4
ZD
2964 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2965 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
0ff4040e
SP
2966 *last_conflicts = chrec_dont_know;
2967 }
2968
2969 /* If they aren't the same, and aren't affine, we can't do anything
3e6f8b56 2970 yet. */
b8698a0f 2971 else if ((chrec_contains_symbols (chrec_a)
0ff4040e 2972 || chrec_contains_symbols (chrec_b))
da9a21f4
SP
2973 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2974 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
0ff4040e
SP
2975 {
2976 dependence_stats.num_subscript_undetermined++;
d93817c4
ZD
2977 *overlap_iterations_a = conflict_fn_not_known ();
2978 *overlap_iterations_b = conflict_fn_not_known ();
0ff4040e
SP
2979 }
2980
56cf8686 2981 else if (ziv_subscript_p (chrec_a, chrec_b))
b8698a0f 2982 analyze_ziv_subscript (chrec_a, chrec_b,
86df10e3
SP
2983 overlap_iterations_a, overlap_iterations_b,
2984 last_conflicts);
b8698a0f 2985
56cf8686 2986 else if (siv_subscript_p (chrec_a, chrec_b))
b8698a0f
L
2987 analyze_siv_subscript (chrec_a, chrec_b,
2988 overlap_iterations_a, overlap_iterations_b,
5b78fc3e 2989 last_conflicts, lnn);
b8698a0f 2990
56cf8686 2991 else
b8698a0f 2992 analyze_miv_subscript (chrec_a, chrec_b,
86df10e3 2993 overlap_iterations_a, overlap_iterations_b,
da9a21f4 2994 last_conflicts, loop_nest);
b8698a0f 2995
56cf8686
SP
2996 if (dump_file && (dump_flags & TDF_DETAILS))
2997 {
2998 fprintf (dump_file, " (overlap_iterations_a = ");
d93817c4 2999 dump_conflict_function (dump_file, *overlap_iterations_a);
56cf8686 3000 fprintf (dump_file, ")\n (overlap_iterations_b = ");
d93817c4 3001 dump_conflict_function (dump_file, *overlap_iterations_b);
56cf8686 3002 fprintf (dump_file, ")\n");
0ff4040e 3003 fprintf (dump_file, ")\n");
56cf8686
SP
3004 }
3005}
3006
ba42e045 3007/* Helper function for uniquely inserting distance vectors. */
56cf8686 3008
ba42e045
SP
3009static void
3010save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3011{
3012 unsigned i;
3013 lambda_vector v;
56cf8686 3014
ac47786e 3015 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
ba42e045
SP
3016 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3017 return;
56cf8686 3018
ba42e045
SP
3019 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
3020}
56cf8686 3021
ba42e045
SP
3022/* Helper function for uniquely inserting direction vectors. */
3023
3024static void
3025save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
56cf8686
SP
3026{
3027 unsigned i;
ba42e045 3028 lambda_vector v;
0ff4040e 3029
ac47786e 3030 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
ba42e045
SP
3031 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3032 return;
3033
3034 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
3035}
3036
3037/* Add a distance of 1 on all the loops outer than INDEX. If we
3038 haven't yet determined a distance for this outer loop, push a new
3039 distance vector composed of the previous distance, and a distance
3040 of 1 for this outer loop. Example:
3041
3042 | loop_1
3043 | loop_2
3044 | A[10]
3045 | endloop_2
3046 | endloop_1
3047
3048 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3049 save (0, 1), then we have to save (1, 0). */
3050
3051static void
3052add_outer_distances (struct data_dependence_relation *ddr,
3053 lambda_vector dist_v, int index)
3054{
3055 /* For each outer loop where init_v is not set, the accesses are
3056 in dependence of distance 1 in the loop. */
3057 while (--index >= 0)
3058 {
3059 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3060 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3061 save_v[index] = 1;
3062 save_dist_v (ddr, save_v);
3063 }
3064}
3065
3066/* Return false when fail to represent the data dependence as a
3067 distance vector. INIT_B is set to true when a component has been
3068 added to the distance vector DIST_V. INDEX_CARRY is then set to
3069 the index in DIST_V that carries the dependence. */
3070
3071static bool
3072build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3073 struct data_reference *ddr_a,
3074 struct data_reference *ddr_b,
3075 lambda_vector dist_v, bool *init_b,
3076 int *index_carry)
3077{
3078 unsigned i;
3079 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
0ff4040e 3080
36d59cf7 3081 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
56cf8686 3082 {
86df10e3 3083 tree access_fn_a, access_fn_b;
36d59cf7 3084 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
56cf8686
SP
3085
3086 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
86df10e3
SP
3087 {
3088 non_affine_dependence_relation (ddr);
ba42e045 3089 return false;
86df10e3
SP
3090 }
3091
ba42e045
SP
3092 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3093 access_fn_b = DR_ACCESS_FN (ddr_b, i);
56cf8686 3094
b8698a0f 3095 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
86df10e3 3096 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
56cf8686 3097 {
ba42e045 3098 int dist, index;
a130584a
SP
3099 int var_a = CHREC_VARIABLE (access_fn_a);
3100 int var_b = CHREC_VARIABLE (access_fn_b);
ba42e045 3101
a130584a
SP
3102 if (var_a != var_b
3103 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
86df10e3
SP
3104 {
3105 non_affine_dependence_relation (ddr);
ba42e045 3106 return false;
86df10e3 3107 }
b8698a0f 3108
6b6fa4e9 3109 dist = int_cst_value (SUB_DISTANCE (subscript));
a130584a
SP
3110 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3111 *index_carry = MIN (index, *index_carry);
56cf8686 3112
ba42e045
SP
3113 /* This is the subscript coupling test. If we have already
3114 recorded a distance for this loop (a distance coming from
3115 another subscript), it should be the same. For example,
3116 in the following code, there is no dependence:
3117
56cf8686
SP
3118 | loop i = 0, N, 1
3119 | T[i+1][i] = ...
3120 | ... = T[i][i]
3121 | endloop
ba42e045
SP
3122 */
3123 if (init_v[index] != 0 && dist_v[index] != dist)
56cf8686 3124 {
36d59cf7 3125 finalize_ddr_dependent (ddr, chrec_known);
ba42e045 3126 return false;
56cf8686
SP
3127 }
3128
ba42e045
SP
3129 dist_v[index] = dist;
3130 init_v[index] = 1;
3131 *init_b = true;
3132 }
a50411de 3133 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
ba42e045
SP
3134 {
3135 /* This can be for example an affine vs. constant dependence
3136 (T[i] vs. T[3]) that is not an affine dependence and is
3137 not representable as a distance vector. */
3138 non_affine_dependence_relation (ddr);
3139 return false;
56cf8686
SP
3140 }
3141 }
304afda6 3142
ba42e045
SP
3143 return true;
3144}
304afda6 3145
1baf2906
SP
3146/* Return true when the DDR contains only constant access functions. */
3147
3148static bool
ed7a4b4b 3149constant_access_functions (const struct data_dependence_relation *ddr)
1baf2906
SP
3150{
3151 unsigned i;
3152
3153 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3154 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3155 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3156 return false;
3157
3158 return true;
3159}
3160
ba42e045 3161/* Helper function for the case where DDR_A and DDR_B are the same
097392de
SP
3162 multivariate access function with a constant step. For an example
3163 see pr34635-1.c. */
86df10e3 3164
ba42e045
SP
3165static void
3166add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3167{
3168 int x_1, x_2;
3169 tree c_1 = CHREC_LEFT (c_2);
3170 tree c_0 = CHREC_LEFT (c_1);
3171 lambda_vector dist_v;
0ca2faee 3172 int v1, v2, cd;
86df10e3 3173
b1e75954
SP
3174 /* Polynomials with more than 2 variables are not handled yet. When
3175 the evolution steps are parameters, it is not possible to
3176 represent the dependence using classical distance vectors. */
3177 if (TREE_CODE (c_0) != INTEGER_CST
3178 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3179 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3180 {
3181 DDR_AFFINE_P (ddr) = false;
ba42e045
SP
3182 return;
3183 }
304afda6 3184
ba42e045
SP
3185 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3186 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
304afda6 3187
ba42e045
SP
3188 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3189 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
6b6fa4e9
SP
3190 v1 = int_cst_value (CHREC_RIGHT (c_1));
3191 v2 = int_cst_value (CHREC_RIGHT (c_2));
0ca2faee
ZD
3192 cd = gcd (v1, v2);
3193 v1 /= cd;
3194 v2 /= cd;
3195
3196 if (v2 < 0)
3197 {
3198 v2 = -v2;
3199 v1 = -v1;
3200 }
3201
3202 dist_v[x_1] = v2;
3203 dist_v[x_2] = -v1;
ba42e045 3204 save_dist_v (ddr, dist_v);
304afda6 3205
ba42e045
SP
3206 add_outer_distances (ddr, dist_v, x_1);
3207}
304afda6 3208
ba42e045
SP
3209/* Helper function for the case where DDR_A and DDR_B are the same
3210 access functions. */
37b8a73b 3211
ba42e045
SP
3212static void
3213add_other_self_distances (struct data_dependence_relation *ddr)
3214{
3215 lambda_vector dist_v;
3216 unsigned i;
3217 int index_carry = DDR_NB_LOOPS (ddr);
304afda6 3218
ba42e045 3219 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
37b8a73b 3220 {
ba42e045 3221 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
304afda6 3222
ba42e045 3223 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
304afda6 3224 {
ba42e045
SP
3225 if (!evolution_function_is_univariate_p (access_fun))
3226 {
3227 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3228 {
3229 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3230 return;
3231 }
3232
097392de
SP
3233 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3234
3235 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3236 add_multivariate_self_dist (ddr, access_fun);
3237 else
3238 /* The evolution step is not constant: it varies in
3239 the outer loop, so this cannot be represented by a
3240 distance vector. For example in pr34635.c the
3241 evolution is {0, +, {0, +, 4}_1}_2. */
3242 DDR_AFFINE_P (ddr) = false;
3243
ba42e045
SP
3244 return;
3245 }
3246
3247 index_carry = MIN (index_carry,
3248 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3249 DDR_LOOP_NEST (ddr)));
304afda6 3250 }
37b8a73b
SP
3251 }
3252
ba42e045
SP
3253 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3254 add_outer_distances (ddr, dist_v, index_carry);
56cf8686
SP
3255}
3256
1baf2906
SP
3257static void
3258insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3259{
3260 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3261
3262 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3263 save_dist_v (ddr, dist_v);
3264}
3265
3266/* Adds a unit distance vector to DDR when there is a 0 overlap. This
3267 is the case for example when access functions are the same and
3268 equal to a constant, as in:
3269
3270 | loop_1
3271 | A[3] = ...
3272 | ... = A[3]
3273 | endloop_1
3274
3275 in which case the distance vectors are (0) and (1). */
3276
3277static void
3278add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3279{
3280 unsigned i, j;
3281
3282 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3283 {
3284 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3285 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3286 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3287
3288 for (j = 0; j < ca->n; j++)
3289 if (affine_function_zero_p (ca->fns[j]))
3290 {
3291 insert_innermost_unit_dist_vector (ddr);
3292 return;
3293 }
3294
3295 for (j = 0; j < cb->n; j++)
3296 if (affine_function_zero_p (cb->fns[j]))
3297 {
3298 insert_innermost_unit_dist_vector (ddr);
3299 return;
3300 }
3301 }
3302}
3303
ba42e045
SP
3304/* Compute the classic per loop distance vector. DDR is the data
3305 dependence relation to build a vector from. Return false when fail
3306 to represent the data dependence as a distance vector. */
56cf8686 3307
464f49d8 3308static bool
da9a21f4
SP
3309build_classic_dist_vector (struct data_dependence_relation *ddr,
3310 struct loop *loop_nest)
56cf8686 3311{
304afda6 3312 bool init_b = false;
ba42e045
SP
3313 int index_carry = DDR_NB_LOOPS (ddr);
3314 lambda_vector dist_v;
304afda6 3315
36d59cf7 3316 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2f470326 3317 return false;
ba42e045
SP
3318
3319 if (same_access_functions (ddr))
56cf8686 3320 {
ba42e045
SP
3321 /* Save the 0 vector. */
3322 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3323 save_dist_v (ddr, dist_v);
56cf8686 3324
1baf2906
SP
3325 if (constant_access_functions (ddr))
3326 add_distance_for_zero_overlaps (ddr);
3327
ba42e045
SP
3328 if (DDR_NB_LOOPS (ddr) > 1)
3329 add_other_self_distances (ddr);
86df10e3 3330
ba42e045
SP
3331 return true;
3332 }
86df10e3 3333
ba42e045
SP
3334 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3335 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3336 dist_v, &init_b, &index_carry))
3337 return false;
86df10e3 3338
ba42e045
SP
3339 /* Save the distance vector if we initialized one. */
3340 if (init_b)
3341 {
3342 /* Verify a basic constraint: classic distance vectors should
3343 always be lexicographically positive.
3344
3345 Data references are collected in the order of execution of
3346 the program, thus for the following loop
3347
3348 | for (i = 1; i < 100; i++)
3349 | for (j = 1; j < 100; j++)
3350 | {
3351 | t = T[j+1][i-1]; // A
3352 | T[j][i] = t + 2; // B
3353 | }
3354
3355 references are collected following the direction of the wind:
3356 A then B. The data dependence tests are performed also
3357 following this order, such that we're looking at the distance
3358 separating the elements accessed by A from the elements later
3359 accessed by B. But in this example, the distance returned by
3360 test_dep (A, B) is lexicographically negative (-1, 1), that
3361 means that the access A occurs later than B with respect to
3362 the outer loop, ie. we're actually looking upwind. In this
3363 case we solve test_dep (B, A) looking downwind to the
3364 lexicographically positive solution, that returns the
3365 distance vector (1, -1). */
3366 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3367 {
3368 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2f470326
JJ
3369 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3370 loop_nest))
3371 return false;
ba42e045 3372 compute_subscript_distance (ddr);
2f470326
JJ
3373 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3374 save_v, &init_b, &index_carry))
3375 return false;
ba42e045 3376 save_dist_v (ddr, save_v);
71d5b5e1 3377 DDR_REVERSED_P (ddr) = true;
ba42e045
SP
3378
3379 /* In this case there is a dependence forward for all the
3380 outer loops:
3381
3382 | for (k = 1; k < 100; k++)
3383 | for (i = 1; i < 100; i++)
3384 | for (j = 1; j < 100; j++)
3385 | {
3386 | t = T[j+1][i-1]; // A
3387 | T[j][i] = t + 2; // B
3388 | }
3389
b8698a0f 3390 the vectors are:
ba42e045
SP
3391 (0, 1, -1)
3392 (1, 1, -1)
3393 (1, -1, 1)
3394 */
3395 if (DDR_NB_LOOPS (ddr) > 1)
3396 {
3397 add_outer_distances (ddr, save_v, index_carry);
3398 add_outer_distances (ddr, dist_v, index_carry);
86df10e3 3399 }
ba42e045
SP
3400 }
3401 else
3402 {
3403 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3404 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
86df10e3 3405
ba42e045 3406 if (DDR_NB_LOOPS (ddr) > 1)
56cf8686 3407 {
ba42e045 3408 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
86df10e3 3409
2f470326
JJ
3410 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3411 DDR_A (ddr), loop_nest))
3412 return false;
ba42e045 3413 compute_subscript_distance (ddr);
2f470326
JJ
3414 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3415 opposite_v, &init_b,
3416 &index_carry))
3417 return false;
86df10e3 3418
2f470326 3419 save_dist_v (ddr, save_v);
ba42e045
SP
3420 add_outer_distances (ddr, dist_v, index_carry);
3421 add_outer_distances (ddr, opposite_v, index_carry);
56cf8686 3422 }
2f470326
JJ
3423 else
3424 save_dist_v (ddr, save_v);
56cf8686
SP
3425 }
3426 }
ba42e045
SP
3427 else
3428 {
3429 /* There is a distance of 1 on all the outer loops: Example:
3430 there is a dependence of distance 1 on loop_1 for the array A.
304afda6 3431
ba42e045
SP
3432 | loop_1
3433 | A[5] = ...
3434 | endloop
3435 */
3436 add_outer_distances (ddr, dist_v,
3437 lambda_vector_first_nz (dist_v,
3438 DDR_NB_LOOPS (ddr), 0));
3439 }
3440
3441 if (dump_file && (dump_flags & TDF_DETAILS))
304afda6 3442 {
ba42e045 3443 unsigned i;
304afda6 3444
ba42e045
SP
3445 fprintf (dump_file, "(build_classic_dist_vector\n");
3446 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3447 {
3448 fprintf (dump_file, " dist_vector = (");
3449 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3450 DDR_NB_LOOPS (ddr));
3451 fprintf (dump_file, " )\n");
3452 }
3453 fprintf (dump_file, ")\n");
304afda6
SP
3454 }
3455
ba42e045
SP
3456 return true;
3457}
56cf8686 3458
ba42e045
SP
3459/* Return the direction for a given distance.
3460 FIXME: Computing dir this way is suboptimal, since dir can catch
3461 cases that dist is unable to represent. */
86df10e3 3462
ba42e045
SP
3463static inline enum data_dependence_direction
3464dir_from_dist (int dist)
3465{
3466 if (dist > 0)
3467 return dir_positive;
3468 else if (dist < 0)
3469 return dir_negative;
3470 else
3471 return dir_equal;
3472}
304afda6 3473
ba42e045
SP
3474/* Compute the classic per loop direction vector. DDR is the data
3475 dependence relation to build a vector from. */
304afda6 3476
ba42e045
SP
3477static void
3478build_classic_dir_vector (struct data_dependence_relation *ddr)
3479{
3480 unsigned i, j;
3481 lambda_vector dist_v;
86df10e3 3482
ac47786e 3483 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
ba42e045
SP
3484 {
3485 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
86df10e3 3486
ba42e045
SP
3487 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3488 dir_v[j] = dir_from_dist (dist_v[j]);
304afda6 3489
ba42e045
SP
3490 save_dir_v (ddr, dir_v);
3491 }
56cf8686
SP
3492}
3493
ba42e045
SP
3494/* Helper function. Returns true when there is a dependence between
3495 data references DRA and DRB. */
0ff4040e 3496
ba42e045
SP
3497static bool
3498subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3499 struct data_reference *dra,
da9a21f4
SP
3500 struct data_reference *drb,
3501 struct loop *loop_nest)
0ff4040e
SP
3502{
3503 unsigned int i;
0ff4040e 3504 tree last_conflicts;
ebf78a47 3505 struct subscript *subscript;
9b00587c 3506 tree res = NULL_TREE;
ba42e045 3507
ebf78a47
SP
3508 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3509 i++)
0ff4040e 3510 {
d93817c4 3511 conflict_function *overlaps_a, *overlaps_b;
ebf78a47 3512
b8698a0f 3513 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
0ff4040e 3514 DR_ACCESS_FN (drb, i),
b8698a0f 3515 &overlaps_a, &overlaps_b,
da9a21f4 3516 &last_conflicts, loop_nest);
ebf78a47 3517
9b00587c
RG
3518 if (SUB_CONFLICTS_IN_A (subscript))
3519 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3520 if (SUB_CONFLICTS_IN_B (subscript))
3521 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3522
3523 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3524 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3525 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3526
3527 /* If there is any undetermined conflict function we have to
3528 give a conservative answer in case we cannot prove that
3529 no dependence exists when analyzing another subscript. */
d93817c4
ZD
3530 if (CF_NOT_KNOWN_P (overlaps_a)
3531 || CF_NOT_KNOWN_P (overlaps_b))
0ff4040e 3532 {
9b00587c
RG
3533 res = chrec_dont_know;
3534 continue;
0ff4040e 3535 }
ebf78a47 3536
9b00587c 3537 /* When there is a subscript with no dependence we can stop. */
d93817c4
ZD
3538 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3539 || CF_NO_DEPENDENCE_P (overlaps_b))
0ff4040e 3540 {
9b00587c
RG
3541 res = chrec_known;
3542 break;
0ff4040e
SP
3543 }
3544 }
3545
9b00587c
RG
3546 if (res == NULL_TREE)
3547 return true;
3548
3549 if (res == chrec_known)
3550 dependence_stats.num_dependence_independent++;
3551 else
3552 dependence_stats.num_dependence_undetermined++;
3553 finalize_ddr_dependent (ddr, res);
3554 return false;
ba42e045
SP
3555}
3556
da9a21f4 3557/* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
ba42e045
SP
3558
3559static void
da9a21f4
SP
3560subscript_dependence_tester (struct data_dependence_relation *ddr,
3561 struct loop *loop_nest)
ba42e045 3562{
b8698a0f 3563
ba42e045
SP
3564 if (dump_file && (dump_flags & TDF_DETAILS))
3565 fprintf (dump_file, "(subscript_dependence_tester \n");
b8698a0f 3566
da9a21f4 3567 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
ba42e045 3568 dependence_stats.num_dependence_dependent++;
0ff4040e 3569
0ff4040e 3570 compute_subscript_distance (ddr);
da9a21f4 3571 if (build_classic_dist_vector (ddr, loop_nest))
ba42e045 3572 build_classic_dir_vector (ddr);
0ff4040e
SP
3573
3574 if (dump_file && (dump_flags & TDF_DETAILS))
3575 fprintf (dump_file, ")\n");
3576}
3577
56cf8686 3578/* Returns true when all the access functions of A are affine or
da9a21f4 3579 constant with respect to LOOP_NEST. */
56cf8686 3580
b8698a0f 3581static bool
ed7a4b4b
KG
3582access_functions_are_affine_or_constant_p (const struct data_reference *a,
3583 const struct loop *loop_nest)
56cf8686
SP
3584{
3585 unsigned int i;
3d8864c0 3586 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
9cbb7989 3587 tree t;
3d8864c0 3588
ac47786e 3589 FOR_EACH_VEC_ELT (tree, fns, i, t)
da9a21f4
SP
3590 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3591 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
56cf8686 3592 return false;
b8698a0f 3593
56cf8686
SP
3594 return true;
3595}
3596
3d8864c0
SP
3597/* Initializes an equation for an OMEGA problem using the information
3598 contained in the ACCESS_FUN. Returns true when the operation
3599 succeeded.
3600
3601 PB is the omega constraint system.
3602 EQ is the number of the equation to be initialized.
3603 OFFSET is used for shifting the variables names in the constraints:
3604 a constrain is composed of 2 * the number of variables surrounding
3605 dependence accesses. OFFSET is set either to 0 for the first n variables,
3606 then it is set to n.
3607 ACCESS_FUN is expected to be an affine chrec. */
3608
3609static bool
b8698a0f
L
3610init_omega_eq_with_af (omega_pb pb, unsigned eq,
3611 unsigned int offset, tree access_fun,
3d8864c0
SP
3612 struct data_dependence_relation *ddr)
3613{
3614 switch (TREE_CODE (access_fun))
3615 {
3616 case POLYNOMIAL_CHREC:
3617 {
3618 tree left = CHREC_LEFT (access_fun);
3619 tree right = CHREC_RIGHT (access_fun);
3620 int var = CHREC_VARIABLE (access_fun);
3621 unsigned var_idx;
3622
3623 if (TREE_CODE (right) != INTEGER_CST)
3624 return false;
3625
3626 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
6b6fa4e9 3627 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3d8864c0
SP
3628
3629 /* Compute the innermost loop index. */
3630 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3631
3632 if (offset == 0)
b8698a0f 3633 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
6b6fa4e9 3634 += int_cst_value (right);
3d8864c0
SP
3635
3636 switch (TREE_CODE (left))
3637 {
3638 case POLYNOMIAL_CHREC:
3639 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3640
3641 case INTEGER_CST:
6b6fa4e9 3642 pb->eqs[eq].coef[0] += int_cst_value (left);
3d8864c0
SP
3643 return true;
3644
3645 default:
3646 return false;
3647 }
3648 }
3649
3650 case INTEGER_CST:
6b6fa4e9 3651 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3d8864c0
SP
3652 return true;
3653
3654 default:
3655 return false;
3656 }
3657}
3658
3659/* As explained in the comments preceding init_omega_for_ddr, we have
3660 to set up a system for each loop level, setting outer loops
3661 variation to zero, and current loop variation to positive or zero.
3662 Save each lexico positive distance vector. */
3663
3664static void
3665omega_extract_distance_vectors (omega_pb pb,
3666 struct data_dependence_relation *ddr)
3667{
3668 int eq, geq;
3669 unsigned i, j;
3670 struct loop *loopi, *loopj;
3671 enum omega_result res;
3672
3673 /* Set a new problem for each loop in the nest. The basis is the
3674 problem that we have initialized until now. On top of this we
3675 add new constraints. */
b8698a0f 3676 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3d8864c0
SP
3677 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3678 {
3679 int dist = 0;
3680 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3681 DDR_NB_LOOPS (ddr));
3682
3683 omega_copy_problem (copy, pb);
3684
3685 /* For all the outer loops "loop_j", add "dj = 0". */
3686 for (j = 0;
3687 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3688 {
3689 eq = omega_add_zero_eq (copy, omega_black);
3690 copy->eqs[eq].coef[j + 1] = 1;
3691 }
3692
3693 /* For "loop_i", add "0 <= di". */
3694 geq = omega_add_zero_geq (copy, omega_black);
3695 copy->geqs[geq].coef[i + 1] = 1;
3696
3697 /* Reduce the constraint system, and test that the current
3698 problem is feasible. */
3699 res = omega_simplify_problem (copy);
b8698a0f 3700 if (res == omega_false
3d8864c0
SP
3701 || res == omega_unknown
3702 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3703 goto next_problem;
3704
3705 for (eq = 0; eq < copy->num_subs; eq++)
3706 if (copy->subs[eq].key == (int) i + 1)
3707 {
3708 dist = copy->subs[eq].coef[0];
3709 goto found_dist;
3710 }
3711
3712 if (dist == 0)
3713 {
3714 /* Reinitialize problem... */
3715 omega_copy_problem (copy, pb);
3716 for (j = 0;
3717 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3718 {
3719 eq = omega_add_zero_eq (copy, omega_black);
3720 copy->eqs[eq].coef[j + 1] = 1;
3721 }
3722
3723 /* ..., but this time "di = 1". */
3724 eq = omega_add_zero_eq (copy, omega_black);
3725 copy->eqs[eq].coef[i + 1] = 1;
3726 copy->eqs[eq].coef[0] = -1;
3727
3728 res = omega_simplify_problem (copy);
b8698a0f 3729 if (res == omega_false
3d8864c0
SP
3730 || res == omega_unknown
3731 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3732 goto next_problem;
3733
3734 for (eq = 0; eq < copy->num_subs; eq++)
3735 if (copy->subs[eq].key == (int) i + 1)
3736 {
3737 dist = copy->subs[eq].coef[0];
3738 goto found_dist;
3739 }
3740 }
3741
3742 found_dist:;
3743 /* Save the lexicographically positive distance vector. */
3744 if (dist >= 0)
3745 {
3746 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3747 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3748
3749 dist_v[i] = dist;
3750
3751 for (eq = 0; eq < copy->num_subs; eq++)
3752 if (copy->subs[eq].key > 0)
3753 {
3754 dist = copy->subs[eq].coef[0];
3755 dist_v[copy->subs[eq].key - 1] = dist;
3756 }
3757
3758 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3759 dir_v[j] = dir_from_dist (dist_v[j]);
3760
3761 save_dist_v (ddr, dist_v);
3762 save_dir_v (ddr, dir_v);
3763 }
3764
3765 next_problem:;
3766 omega_free_problem (copy);
3767 }
3768}
3769
3770/* This is called for each subscript of a tuple of data references:
3771 insert an equality for representing the conflicts. */
3772
3773static bool
3774omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3775 struct data_dependence_relation *ddr,
3776 omega_pb pb, bool *maybe_dependent)
3777{
3778 int eq;
33b30201
SP
3779 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3780 TREE_TYPE (access_fun_b));
726a989a
RB
3781 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3782 tree fun_b = chrec_convert (type, access_fun_b, NULL);
33b30201 3783 tree difference = chrec_fold_minus (type, fun_a, fun_b);
5a1f5f9a 3784 tree minus_one;
3d8864c0
SP
3785
3786 /* When the fun_a - fun_b is not constant, the dependence is not
3787 captured by the classic distance vector representation. */
3788 if (TREE_CODE (difference) != INTEGER_CST)
3789 return false;
3790
3791 /* ZIV test. */
3792 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3793 {
3794 /* There is no dependence. */
3795 *maybe_dependent = false;
3796 return true;
3797 }
3798
5a1f5f9a
SP
3799 minus_one = build_int_cst (type, -1);
3800 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3d8864c0
SP
3801
3802 eq = omega_add_zero_eq (pb, omega_black);
3803 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3804 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3805 /* There is probably a dependence, but the system of
3806 constraints cannot be built: answer "don't know". */
3807 return false;
3808
3809 /* GCD test. */
3810 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
b8698a0f 3811 && !int_divides_p (lambda_vector_gcd
3d8864c0
SP
3812 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3813 2 * DDR_NB_LOOPS (ddr)),
3814 pb->eqs[eq].coef[0]))
3815 {
3816 /* There is no dependence. */
3817 *maybe_dependent = false;
3818 return true;
3819 }
3820
3821 return true;
3822}
3823
3824/* Helper function, same as init_omega_for_ddr but specialized for
3825 data references A and B. */
3826
3827static bool
3828init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3829 struct data_dependence_relation *ddr,
3830 omega_pb pb, bool *maybe_dependent)
3831{
3832 unsigned i;
3833 int ineq;
3834 struct loop *loopi;
3835 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3836
3837 /* Insert an equality per subscript. */
3838 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3839 {
3840 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3841 ddr, pb, maybe_dependent))
3842 return false;
3843 else if (*maybe_dependent == false)
3844 {
3845 /* There is no dependence. */
3846 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3847 return true;
3848 }
3849 }
3850
3851 /* Insert inequalities: constraints corresponding to the iteration
3852 domain, i.e. the loops surrounding the references "loop_x" and
3853 the distance variables "dx". The layout of the OMEGA
3854 representation is as follows:
3855 - coef[0] is the constant
3856 - coef[1..nb_loops] are the protected variables that will not be
3857 removed by the solver: the "dx"
3858 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3859 */
b8698a0f 3860 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3d8864c0
SP
3861 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3862 {
652c4c71 3863 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3d8864c0
SP
3864
3865 /* 0 <= loop_x */
3866 ineq = omega_add_zero_geq (pb, omega_black);
3867 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3868
3869 /* 0 <= loop_x + dx */
3870 ineq = omega_add_zero_geq (pb, omega_black);
3871 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3872 pb->geqs[ineq].coef[i + 1] = 1;
3873
3874 if (nbi != -1)
3875 {
3876 /* loop_x <= nb_iters */
3877 ineq = omega_add_zero_geq (pb, omega_black);
3878 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3879 pb->geqs[ineq].coef[0] = nbi;
3880
3881 /* loop_x + dx <= nb_iters */
3882 ineq = omega_add_zero_geq (pb, omega_black);
3883 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3884 pb->geqs[ineq].coef[i + 1] = -1;
3885 pb->geqs[ineq].coef[0] = nbi;
3886
3887 /* A step "dx" bigger than nb_iters is not feasible, so
3888 add "0 <= nb_iters + dx", */
3889 ineq = omega_add_zero_geq (pb, omega_black);
3890 pb->geqs[ineq].coef[i + 1] = 1;
3891 pb->geqs[ineq].coef[0] = nbi;
3892 /* and "dx <= nb_iters". */
3893 ineq = omega_add_zero_geq (pb, omega_black);
3894 pb->geqs[ineq].coef[i + 1] = -1;
3895 pb->geqs[ineq].coef[0] = nbi;
3896 }
3897 }
3898
3899 omega_extract_distance_vectors (pb, ddr);
3900
3901 return true;
3902}
3903
3904/* Sets up the Omega dependence problem for the data dependence
3905 relation DDR. Returns false when the constraint system cannot be
3906 built, ie. when the test answers "don't know". Returns true
3907 otherwise, and when independence has been proved (using one of the
3908 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3909 set MAYBE_DEPENDENT to true.
3910
3911 Example: for setting up the dependence system corresponding to the
b8698a0f 3912 conflicting accesses
3d8864c0
SP
3913
3914 | loop_i
3915 | loop_j
3916 | A[i, i+1] = ...
3917 | ... A[2*j, 2*(i + j)]
3918 | endloop_j
3919 | endloop_i
b8698a0f 3920
3d8864c0
SP
3921 the following constraints come from the iteration domain:
3922
3923 0 <= i <= Ni
3924 0 <= i + di <= Ni
3925 0 <= j <= Nj
3926 0 <= j + dj <= Nj
3927
3928 where di, dj are the distance variables. The constraints
3929 representing the conflicting elements are:
3930
3931 i = 2 * (j + dj)
3932 i + 1 = 2 * (i + di + j + dj)
3933
3934 For asking that the resulting distance vector (di, dj) be
3935 lexicographically positive, we insert the constraint "di >= 0". If
3936 "di = 0" in the solution, we fix that component to zero, and we
3937 look at the inner loops: we set a new problem where all the outer
3938 loop distances are zero, and fix this inner component to be
3939 positive. When one of the components is positive, we save that
3940 distance, and set a new problem where the distance on this loop is
3941 zero, searching for other distances in the inner loops. Here is
3942 the classic example that illustrates that we have to set for each
3943 inner loop a new problem:
3944
3945 | loop_1
3946 | loop_2
3947 | A[10]
3948 | endloop_2
3949 | endloop_1
3950
3951 we have to save two distances (1, 0) and (0, 1).
3952
3953 Given two array references, refA and refB, we have to set the
3954 dependence problem twice, refA vs. refB and refB vs. refA, and we
3955 cannot do a single test, as refB might occur before refA in the
3956 inner loops, and the contrary when considering outer loops: ex.
3957
3958 | loop_0
3959 | loop_1
3960 | loop_2
3961 | T[{1,+,1}_2][{1,+,1}_1] // refA
3962 | T[{2,+,1}_2][{0,+,1}_1] // refB
3963 | endloop_2
3964 | endloop_1
3965 | endloop_0
3966
3967 refB touches the elements in T before refA, and thus for the same
3968 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3969 but for successive loop_0 iterations, we have (1, -1, 1)
3970
3971 The Omega solver expects the distance variables ("di" in the
3972 previous example) to come first in the constraint system (as
3973 variables to be protected, or "safe" variables), the constraint
3974 system is built using the following layout:
3975
3976 "cst | distance vars | index vars".
3977*/
3978
3979static bool
3980init_omega_for_ddr (struct data_dependence_relation *ddr,
3981 bool *maybe_dependent)
3982{
3983 omega_pb pb;
3984 bool res = false;
3985
3986 *maybe_dependent = true;
3987
3988 if (same_access_functions (ddr))
3989 {
3990 unsigned j;
3991 lambda_vector dir_v;
3992
3993 /* Save the 0 vector. */
3994 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3995 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3996 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3997 dir_v[j] = dir_equal;
3998 save_dir_v (ddr, dir_v);
3999
4000 /* Save the dependences carried by outer loops. */
4001 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4002 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4003 maybe_dependent);
4004 omega_free_problem (pb);
4005 return res;
4006 }
4007
4008 /* Omega expects the protected variables (those that have to be kept
4009 after elimination) to appear first in the constraint system.
4010 These variables are the distance variables. In the following
4011 initialization we declare NB_LOOPS safe variables, and the total
4012 number of variables for the constraint system is 2*NB_LOOPS. */
4013 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4014 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4015 maybe_dependent);
4016 omega_free_problem (pb);
4017
4018 /* Stop computation if not decidable, or no dependence. */
4019 if (res == false || *maybe_dependent == false)
4020 return res;
4021
4022 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4023 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4024 maybe_dependent);
4025 omega_free_problem (pb);
4026
4027 return res;
4028}
4029
4030/* Return true when DDR contains the same information as that stored
4031 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4032
4033static bool
4034ddr_consistent_p (FILE *file,
4035 struct data_dependence_relation *ddr,
4036 VEC (lambda_vector, heap) *dist_vects,
4037 VEC (lambda_vector, heap) *dir_vects)
4038{
4039 unsigned int i, j;
4040
4041 /* If dump_file is set, output there. */
4042 if (dump_file && (dump_flags & TDF_DETAILS))
4043 file = dump_file;
4044
4045 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
4046 {
4047 lambda_vector b_dist_v;
4048 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4049 VEC_length (lambda_vector, dist_vects),
4050 DDR_NUM_DIST_VECTS (ddr));
4051
4052 fprintf (file, "Banerjee dist vectors:\n");
ac47786e 4053 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3d8864c0
SP
4054 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4055
4056 fprintf (file, "Omega dist vectors:\n");
4057 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4058 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4059
4060 fprintf (file, "data dependence relation:\n");
4061 dump_data_dependence_relation (file, ddr);
4062
4063 fprintf (file, ")\n");
4064 return false;
4065 }
4066
4067 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
4068 {
4069 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4070 VEC_length (lambda_vector, dir_vects),
4071 DDR_NUM_DIR_VECTS (ddr));
4072 return false;
4073 }
4074
4075 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4076 {
4077 lambda_vector a_dist_v;
4078 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4079
4080 /* Distance vectors are not ordered in the same way in the DDR
4081 and in the DIST_VECTS: search for a matching vector. */
ac47786e 4082 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3d8864c0
SP
4083 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4084 break;
4085
4086 if (j == VEC_length (lambda_vector, dist_vects))
4087 {
4088 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4089 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4090 fprintf (file, "not found in Omega dist vectors:\n");
4091 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4092 fprintf (file, "data dependence relation:\n");
4093 dump_data_dependence_relation (file, ddr);
4094 fprintf (file, ")\n");
4095 }
4096 }
4097
4098 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4099 {
4100 lambda_vector a_dir_v;
4101 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4102
4103 /* Direction vectors are not ordered in the same way in the DDR
4104 and in the DIR_VECTS: search for a matching vector. */
ac47786e 4105 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3d8864c0
SP
4106 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4107 break;
4108
4109 if (j == VEC_length (lambda_vector, dist_vects))
4110 {
4111 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4112 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4113 fprintf (file, "not found in Omega dir vectors:\n");
4114 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4115 fprintf (file, "data dependence relation:\n");
4116 dump_data_dependence_relation (file, ddr);
4117 fprintf (file, ")\n");
4118 }
4119 }
4120
b8698a0f 4121 return true;
3d8864c0
SP
4122}
4123
da9a21f4
SP
4124/* This computes the affine dependence relation between A and B with
4125 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4126 independence between two accesses, while CHREC_DONT_KNOW is used
4127 for representing the unknown relation.
b8698a0f 4128
56cf8686
SP
4129 Note that it is possible to stop the computation of the dependence
4130 relation the first time we detect a CHREC_KNOWN element for a given
4131 subscript. */
4132
0ff4040e 4133static void
da9a21f4
SP
4134compute_affine_dependence (struct data_dependence_relation *ddr,
4135 struct loop *loop_nest)
56cf8686
SP
4136{
4137 struct data_reference *dra = DDR_A (ddr);
4138 struct data_reference *drb = DDR_B (ddr);
b8698a0f 4139
56cf8686
SP
4140 if (dump_file && (dump_flags & TDF_DETAILS))
4141 {
36d59cf7 4142 fprintf (dump_file, "(compute_affine_dependence\n");
c4ddde1b
RG
4143 fprintf (dump_file, " stmt_a: ");
4144 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4145 fprintf (dump_file, " stmt_b: ");
4146 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
56cf8686 4147 }
0ff4040e 4148
56cf8686 4149 /* Analyze only when the dependence relation is not yet known. */
1a4571cb 4150 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
56cf8686 4151 {
0ff4040e
SP
4152 dependence_stats.num_dependence_tests++;
4153
da9a21f4
SP
4154 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4155 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3d8864c0
SP
4156 {
4157 if (flag_check_data_deps)
4158 {
4159 /* Compute the dependences using the first algorithm. */
da9a21f4 4160 subscript_dependence_tester (ddr, loop_nest);
3d8864c0
SP
4161
4162 if (dump_file && (dump_flags & TDF_DETAILS))
4163 {
4164 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4165 dump_data_dependence_relation (dump_file, ddr);
4166 }
4167
4168 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4169 {
4170 bool maybe_dependent;
4171 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4172
4173 /* Save the result of the first DD analyzer. */
4174 dist_vects = DDR_DIST_VECTS (ddr);
4175 dir_vects = DDR_DIR_VECTS (ddr);
4176
4177 /* Reset the information. */
4178 DDR_DIST_VECTS (ddr) = NULL;
4179 DDR_DIR_VECTS (ddr) = NULL;
4180
4181 /* Compute the same information using Omega. */
4182 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4183 goto csys_dont_know;
4184
4185 if (dump_file && (dump_flags & TDF_DETAILS))
4186 {
4187 fprintf (dump_file, "Omega Analyzer\n");
4188 dump_data_dependence_relation (dump_file, ddr);
4189 }
4190
4191 /* Check that we get the same information. */
4192 if (maybe_dependent)
4193 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4194 dir_vects));
4195 }
4196 }
4197 else
da9a21f4 4198 subscript_dependence_tester (ddr, loop_nest);
3d8864c0 4199 }
b8698a0f 4200
56cf8686
SP
4201 /* As a last case, if the dependence cannot be determined, or if
4202 the dependence is considered too difficult to determine, answer
4203 "don't know". */
4204 else
0ff4040e 4205 {
3d8864c0 4206 csys_dont_know:;
0ff4040e
SP
4207 dependence_stats.num_dependence_undetermined++;
4208
4209 if (dump_file && (dump_flags & TDF_DETAILS))
4210 {
4211 fprintf (dump_file, "Data ref a:\n");
4212 dump_data_reference (dump_file, dra);
4213 fprintf (dump_file, "Data ref b:\n");
4214 dump_data_reference (dump_file, drb);
4215 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4216 }
4217 finalize_ddr_dependent (ddr, chrec_dont_know);
4218 }
56cf8686 4219 }
b8698a0f 4220
56cf8686 4221 if (dump_file && (dump_flags & TDF_DETAILS))
c4ddde1b
RG
4222 {
4223 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4224 fprintf (dump_file, ") -> no dependence\n");
4225 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4226 fprintf (dump_file, ") -> dependence analysis failed\n");
4227 else
4228 fprintf (dump_file, ")\n");
4229 }
56cf8686
SP
4230}
4231
ba42e045
SP
4232/* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4233 the data references in DATAREFS, in the LOOP_NEST. When
ebf78a47 4234 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3881dee9
AB
4235 relations. Return true when successful, i.e. data references number
4236 is small enough to be handled. */
56cf8686 4237
3881dee9 4238bool
ebf78a47 4239compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
a84481aa 4240 VEC (ddr_p, heap) **dependence_relations,
ba42e045 4241 VEC (loop_p, heap) *loop_nest,
ebf78a47 4242 bool compute_self_and_rr)
56cf8686 4243{
ebf78a47
SP
4244 struct data_dependence_relation *ddr;
4245 struct data_reference *a, *b;
4246 unsigned int i, j;
56cf8686 4247
3881dee9
AB
4248 if ((int) VEC_length (data_reference_p, datarefs)
4249 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4250 {
4251 struct data_dependence_relation *ddr;
4252
4253 /* Insert a single relation into dependence_relations:
4254 chrec_dont_know. */
4255 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4256 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4257 return false;
4258 }
4259
ac47786e 4260 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
ebf78a47 4261 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
b0af49c4 4262 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
ebf78a47
SP
4263 {
4264 ddr = initialize_data_dependence_relation (a, b, loop_nest);
a84481aa 4265 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
a70d6342
IR
4266 if (loop_nest)
4267 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
ebf78a47 4268 }
789246d7 4269
ebf78a47 4270 if (compute_self_and_rr)
ac47786e 4271 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
56cf8686 4272 {
ebf78a47 4273 ddr = initialize_data_dependence_relation (a, a, loop_nest);
a84481aa 4274 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
1a4571cb
RL
4275 if (loop_nest)
4276 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
56cf8686 4277 }
3881dee9
AB
4278
4279 return true;
56cf8686
SP
4280}
4281
aeb83f09
RG
4282/* Describes a location of a memory reference. */
4283
4284typedef struct data_ref_loc_d
4285{
4286 /* Position of the memory reference. */
4287 tree *pos;
4288
4289 /* True if the memory reference is read. */
4290 bool is_read;
4291} data_ref_loc;
4292
4293DEF_VEC_O (data_ref_loc);
4294DEF_VEC_ALLOC_O (data_ref_loc, heap);
4295
946e1bc7
ZD
4296/* Stores the locations of memory references in STMT to REFERENCES. Returns
4297 true if STMT clobbers memory, false otherwise. */
4298
aeb83f09 4299static bool
726a989a 4300get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
946e1bc7
ZD
4301{
4302 bool clobbers_memory = false;
4303 data_ref_loc *ref;
726a989a
RB
4304 tree *op0, *op1;
4305 enum gimple_code stmt_code = gimple_code (stmt);
946e1bc7
ZD
4306
4307 *references = NULL;
4308
4309 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4310 Calls have side-effects, except those to const or pure
4311 functions. */
726a989a
RB
4312 if ((stmt_code == GIMPLE_CALL
4313 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4314 || (stmt_code == GIMPLE_ASM
06e1d03d 4315 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
946e1bc7
ZD
4316 clobbers_memory = true;
4317
5006671f 4318 if (!gimple_vuse (stmt))
946e1bc7
ZD
4319 return clobbers_memory;
4320
726a989a 4321 if (stmt_code == GIMPLE_ASSIGN)
946e1bc7 4322 {
c8ae0bec 4323 tree base;
726a989a
RB
4324 op0 = gimple_assign_lhs_ptr (stmt);
4325 op1 = gimple_assign_rhs1_ptr (stmt);
b8698a0f 4326
946e1bc7 4327 if (DECL_P (*op1)
c8ae0bec
RG
4328 || (REFERENCE_CLASS_P (*op1)
4329 && (base = get_base_address (*op1))
4330 && TREE_CODE (base) != SSA_NAME))
946e1bc7
ZD
4331 {
4332 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4333 ref->pos = op1;
4334 ref->is_read = true;
4335 }
946e1bc7 4336 }
726a989a 4337 else if (stmt_code == GIMPLE_CALL)
946e1bc7 4338 {
1b14d815 4339 unsigned i, n;
ac84e05e 4340
1b14d815
RS
4341 op0 = gimple_call_lhs_ptr (stmt);
4342 n = gimple_call_num_args (stmt);
ac84e05e 4343 for (i = 0; i < n; i++)
946e1bc7 4344 {
1b14d815 4345 op1 = gimple_call_arg_ptr (stmt, i);
ac84e05e 4346
1b14d815
RS
4347 if (DECL_P (*op1)
4348 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
946e1bc7
ZD
4349 {
4350 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
1b14d815 4351 ref->pos = op1;
946e1bc7
ZD
4352 ref->is_read = true;
4353 }
4354 }
4355 }
1b14d815
RS
4356 else
4357 return clobbers_memory;
946e1bc7 4358
1b14d815
RS
4359 if (*op0
4360 && (DECL_P (*op0)
4361 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4362 {
4363 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4364 ref->pos = op0;
4365 ref->is_read = false;
4366 }
946e1bc7
ZD
4367 return clobbers_memory;
4368}
4369
4370/* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3cb960c7 4371 reference, returns false, otherwise returns true. NEST is the outermost
f8bf9252 4372 loop of the loop nest in which the references should be analyzed. */
946e1bc7 4373
f8bf9252 4374bool
726a989a 4375find_data_references_in_stmt (struct loop *nest, gimple stmt,
946e1bc7
ZD
4376 VEC (data_reference_p, heap) **datarefs)
4377{
4378 unsigned i;
4379 VEC (data_ref_loc, heap) *references;
4380 data_ref_loc *ref;
4381 bool ret = true;
4382 data_reference_p dr;
4383
4384 if (get_references_in_stmt (stmt, &references))
4385 {
4386 VEC_free (data_ref_loc, heap, references);
4387 return false;
4388 }
4389
ac47786e 4390 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
946e1bc7 4391 {
5c640e29
SP
4392 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4393 *ref->pos, stmt, ref->is_read);
bbc8a8dc 4394 gcc_assert (dr != NULL);
bbc8a8dc 4395 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
946e1bc7
ZD
4396 }
4397 VEC_free (data_ref_loc, heap, references);
4398 return ret;
4399}
4400
5c640e29
SP
4401/* Stores the data references in STMT to DATAREFS. If there is an
4402 unanalyzable reference, returns false, otherwise returns true.
4403 NEST is the outermost loop of the loop nest in which the references
4404 should be instantiated, LOOP is the loop in which the references
4405 should be analyzed. */
ed91d661
SP
4406
4407bool
5c640e29 4408graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
ed91d661
SP
4409 VEC (data_reference_p, heap) **datarefs)
4410{
4411 unsigned i;
4412 VEC (data_ref_loc, heap) *references;
4413 data_ref_loc *ref;
4414 bool ret = true;
4415 data_reference_p dr;
4416
4417 if (get_references_in_stmt (stmt, &references))
4418 {
4419 VEC_free (data_ref_loc, heap, references);
4420 return false;
4421 }
4422
ac47786e 4423 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
ed91d661 4424 {
5c640e29 4425 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
ed91d661
SP
4426 gcc_assert (dr != NULL);
4427 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4428 }
4429
4430 VEC_free (data_ref_loc, heap, references);
4431 return ret;
4432}
4433
a70d6342
IR
4434/* Search the data references in LOOP, and record the information into
4435 DATAREFS. Returns chrec_dont_know when failing to analyze a
4436 difficult case, returns NULL_TREE otherwise. */
4437
bfe068c3 4438tree
a70d6342
IR
4439find_data_references_in_bb (struct loop *loop, basic_block bb,
4440 VEC (data_reference_p, heap) **datarefs)
4441{
4442 gimple_stmt_iterator bsi;
4443
4444 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4445 {
4446 gimple stmt = gsi_stmt (bsi);
4447
4448 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4449 {
4450 struct data_reference *res;
4451 res = XCNEW (struct data_reference);
4452 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4453
4454 return chrec_dont_know;
4455 }
4456 }
4457
4458 return NULL_TREE;
4459}
4460
56cf8686
SP
4461/* Search the data references in LOOP, and record the information into
4462 DATAREFS. Returns chrec_dont_know when failing to analyze a
4463 difficult case, returns NULL_TREE otherwise.
3cb960c7 4464
464f49d8
DB
4465 TODO: This function should be made smarter so that it can handle address
4466 arithmetic as if they were array accesses, etc. */
56cf8686 4467
3881dee9 4468static tree
ebf78a47 4469find_data_references_in_loop (struct loop *loop,
e14b10df 4470 VEC (data_reference_p, heap) **datarefs)
56cf8686 4471{
ccbdbf0a
JL
4472 basic_block bb, *bbs;
4473 unsigned int i;
86df10e3 4474
bbc8a8dc 4475 bbs = get_loop_body_in_dom_order (loop);
ccbdbf0a
JL
4476
4477 for (i = 0; i < loop->num_nodes; i++)
56cf8686 4478 {
ccbdbf0a
JL
4479 bb = bbs[i];
4480
a70d6342
IR
4481 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4482 {
4483 free (bbs);
4484 return chrec_dont_know;
4485 }
56cf8686 4486 }
ccbdbf0a
JL
4487 free (bbs);
4488
4aad410d 4489 return NULL_TREE;
56cf8686
SP
4490}
4491
ba42e045
SP
4492/* Recursive helper function. */
4493
4494static bool
3ac57120 4495find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
ba42e045
SP
4496{
4497 /* Inner loops of the nest should not contain siblings. Example:
4498 when there are two consecutive loops,
4499
4500 | loop_0
4501 | loop_1
4502 | A[{0, +, 1}_1]
4503 | endloop_1
4504 | loop_2
4505 | A[{0, +, 1}_2]
4506 | endloop_2
4507 | endloop_0
4508
4509 the dependence relation cannot be captured by the distance
4510 abstraction. */
4511 if (loop->next)
4512 return false;
56cf8686 4513
3ac57120 4514 VEC_safe_push (loop_p, heap, *loop_nest, loop);
ba42e045
SP
4515 if (loop->inner)
4516 return find_loop_nest_1 (loop->inner, loop_nest);
4517 return true;
4518}
4519
4520/* Return false when the LOOP is not well nested. Otherwise return
4521 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4522 contain the loops from the outermost to the innermost, as they will
4523 appear in the classic distance vector. */
4524
5417e022 4525bool
3ac57120 4526find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
ba42e045 4527{
3ac57120 4528 VEC_safe_push (loop_p, heap, *loop_nest, loop);
ba42e045
SP
4529 if (loop->inner)
4530 return find_loop_nest_1 (loop->inner, loop_nest);
4531 return true;
4532}
56cf8686 4533
9f275479
JS
4534/* Returns true when the data dependences have been computed, false otherwise.
4535 Given a loop nest LOOP, the following vectors are returned:
b8698a0f
L
4536 DATAREFS is initialized to all the array elements contained in this loop,
4537 DEPENDENCE_RELATIONS contains the relations between the data references.
4538 Compute read-read and self relations if
86a07404 4539 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
56cf8686 4540
9f275479 4541bool
b8698a0f 4542compute_data_dependences_for_loop (struct loop *loop,
86a07404 4543 bool compute_self_and_read_read_dependences,
01be8516 4544 VEC (loop_p, heap) **loop_nest,
e14b10df
SP
4545 VEC (data_reference_p, heap) **datarefs,
4546 VEC (ddr_p, heap) **dependence_relations)
56cf8686 4547{
9f275479 4548 bool res = true;
86a07404 4549
0ff4040e 4550 memset (&dependence_stats, 0, sizeof (dependence_stats));
56cf8686 4551
b8698a0f 4552 /* If the loop nest is not well formed, or one of the data references
ba42e045
SP
4553 is not computable, give up without spending time to compute other
4554 dependences. */
3cb960c7 4555 if (!loop
01be8516 4556 || !find_loop_nest (loop, loop_nest)
3881dee9
AB
4557 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4558 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4559 compute_self_and_read_read_dependences))
4560 res = false;
0ff4040e
SP
4561
4562 if (dump_file && (dump_flags & TDF_STATS))
56cf8686 4563 {
0ff4040e
SP
4564 fprintf (dump_file, "Dependence tester statistics:\n");
4565
b8698a0f 4566 fprintf (dump_file, "Number of dependence tests: %d\n",
0ff4040e 4567 dependence_stats.num_dependence_tests);
b8698a0f 4568 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
0ff4040e 4569 dependence_stats.num_dependence_dependent);
b8698a0f 4570 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
0ff4040e 4571 dependence_stats.num_dependence_independent);
b8698a0f 4572 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
0ff4040e
SP
4573 dependence_stats.num_dependence_undetermined);
4574
b8698a0f 4575 fprintf (dump_file, "Number of subscript tests: %d\n",
0ff4040e 4576 dependence_stats.num_subscript_tests);
b8698a0f 4577 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
0ff4040e 4578 dependence_stats.num_subscript_undetermined);
b8698a0f 4579 fprintf (dump_file, "Number of same subscript function: %d\n",
0ff4040e
SP
4580 dependence_stats.num_same_subscript_function);
4581
4582 fprintf (dump_file, "Number of ziv tests: %d\n",
4583 dependence_stats.num_ziv);
4584 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4585 dependence_stats.num_ziv_dependent);
4586 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4587 dependence_stats.num_ziv_independent);
4588 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
b8698a0f 4589 dependence_stats.num_ziv_unimplemented);
0ff4040e 4590
b8698a0f 4591 fprintf (dump_file, "Number of siv tests: %d\n",
0ff4040e
SP
4592 dependence_stats.num_siv);
4593 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4594 dependence_stats.num_siv_dependent);
4595 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4596 dependence_stats.num_siv_independent);
4597 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4598 dependence_stats.num_siv_unimplemented);
4599
b8698a0f 4600 fprintf (dump_file, "Number of miv tests: %d\n",
0ff4040e
SP
4601 dependence_stats.num_miv);
4602 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4603 dependence_stats.num_miv_dependent);
4604 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4605 dependence_stats.num_miv_independent);
4606 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4607 dependence_stats.num_miv_unimplemented);
9f275479
JS
4608 }
4609
4610 return res;
56cf8686
SP
4611}
4612
b8698a0f 4613/* Returns true when the data dependences for the basic block BB have been
a70d6342 4614 computed, false otherwise.
b8698a0f 4615 DATAREFS is initialized to all the array elements contained in this basic
a70d6342
IR
4616 block, DEPENDENCE_RELATIONS contains the relations between the data
4617 references. Compute read-read and self relations if
4618 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4619bool
4620compute_data_dependences_for_bb (basic_block bb,
4621 bool compute_self_and_read_read_dependences,
4622 VEC (data_reference_p, heap) **datarefs,
4623 VEC (ddr_p, heap) **dependence_relations)
4624{
4625 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4626 return false;
4627
3881dee9
AB
4628 return compute_all_dependences (*datarefs, dependence_relations, NULL,
4629 compute_self_and_read_read_dependences);
a70d6342
IR
4630}
4631
56cf8686 4632/* Entry point (for testing only). Analyze all the data references
3d8864c0 4633 and the dependence relations in LOOP.
56cf8686 4634
b8698a0f
L
4635 The data references are computed first.
4636
56cf8686
SP
4637 A relation on these nodes is represented by a complete graph. Some
4638 of the relations could be of no interest, thus the relations can be
4639 computed on demand.
b8698a0f 4640
56cf8686
SP
4641 In the following function we compute all the relations. This is
4642 just a first implementation that is here for:
b8698a0f 4643 - for showing how to ask for the dependence relations,
56cf8686
SP
4644 - for the debugging the whole dependence graph,
4645 - for the dejagnu testcases and maintenance.
b8698a0f 4646
56cf8686
SP
4647 It is possible to ask only for a part of the graph, avoiding to
4648 compute the whole dependence graph. The computed dependences are
4649 stored in a knowledge base (KB) such that later queries don't
4650 recompute the same information. The implementation of this KB is
4651 transparent to the optimizer, and thus the KB can be changed with a
4652 more efficient implementation, or the KB could be disabled. */
b8698a0f 4653static void
3d8864c0 4654analyze_all_data_dependences (struct loop *loop)
56cf8686
SP
4655{
4656 unsigned int i;
56cf8686 4657 int nb_data_refs = 10;
b8698a0f 4658 VEC (data_reference_p, heap) *datarefs =
ebf78a47 4659 VEC_alloc (data_reference_p, heap, nb_data_refs);
b8698a0f 4660 VEC (ddr_p, heap) *dependence_relations =
ebf78a47 4661 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
01be8516 4662 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
56cf8686
SP
4663
4664 /* Compute DDs on the whole function. */
01be8516 4665 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
3d8864c0 4666 &dependence_relations);
56cf8686
SP
4667
4668 if (dump_file)
4669 {
4670 dump_data_dependence_relations (dump_file, dependence_relations);
4671 fprintf (dump_file, "\n\n");
56cf8686 4672
86df10e3
SP
4673 if (dump_flags & TDF_DETAILS)
4674 dump_dist_dir_vectors (dump_file, dependence_relations);
56cf8686 4675
86df10e3 4676 if (dump_flags & TDF_STATS)
56cf8686 4677 {
86df10e3
SP
4678 unsigned nb_top_relations = 0;
4679 unsigned nb_bot_relations = 0;
86df10e3 4680 unsigned nb_chrec_relations = 0;
ebf78a47 4681 struct data_dependence_relation *ddr;
86df10e3 4682
ac47786e 4683 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
86df10e3 4684 {
86df10e3
SP
4685 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4686 nb_top_relations++;
b8698a0f 4687
86df10e3 4688 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5006671f 4689 nb_bot_relations++;
b8698a0f
L
4690
4691 else
86df10e3
SP
4692 nb_chrec_relations++;
4693 }
b8698a0f 4694
86df10e3
SP
4695 gather_stats_on_scev_database ();
4696 }
56cf8686 4697 }
36d59cf7 4698
01be8516 4699 VEC_free (loop_p, heap, loop_nest);
36d59cf7
DB
4700 free_dependence_relations (dependence_relations);
4701 free_data_refs (datarefs);
4702}
3d8864c0
SP
4703
4704/* Computes all the data dependences and check that the results of
4705 several analyzers are the same. */
4706
4707void
4708tree_check_data_deps (void)
4709{
4710 loop_iterator li;
4711 struct loop *loop_nest;
4712
4713 FOR_EACH_LOOP (li, loop_nest, 0)
4714 analyze_all_data_dependences (loop_nest);
4715}
36d59cf7
DB
4716
4717/* Free the memory used by a data dependence relation DDR. */
4718
4719void
4720free_dependence_relation (struct data_dependence_relation *ddr)
4721{
4722 if (ddr == NULL)
4723 return;
4724
2f470326 4725 if (DDR_SUBSCRIPTS (ddr))
d93817c4 4726 free_subscripts (DDR_SUBSCRIPTS (ddr));
2f470326
JJ
4727 if (DDR_DIST_VECTS (ddr))
4728 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4729 if (DDR_DIR_VECTS (ddr))
4730 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
ebf78a47 4731
36d59cf7
DB
4732 free (ddr);
4733}
4734
4735/* Free the memory used by the data dependence relations from
4736 DEPENDENCE_RELATIONS. */
4737
b8698a0f 4738void
ebf78a47 4739free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
36d59cf7
DB
4740{
4741 unsigned int i;
ebf78a47 4742 struct data_dependence_relation *ddr;
36d59cf7 4743
ac47786e 4744 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
01be8516 4745 if (ddr)
3ac57120 4746 free_dependence_relation (ddr);
ebf78a47
SP
4747
4748 VEC_free (ddr_p, heap, dependence_relations);
56cf8686
SP
4749}
4750
36d59cf7
DB
4751/* Free the memory used by the data references from DATAREFS. */
4752
4753void
ebf78a47 4754free_data_refs (VEC (data_reference_p, heap) *datarefs)
36d59cf7
DB
4755{
4756 unsigned int i;
ebf78a47 4757 struct data_reference *dr;
56cf8686 4758
ac47786e 4759 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
8fdbc9c6 4760 free_data_ref (dr);
ebf78a47 4761 VEC_free (data_reference_p, heap, datarefs);
36d59cf7 4762}
86df10e3 4763
3a796c6f
SP
4764\f
4765
dea61d92 4766/* Dump vertex I in RDG to FILE. */
3a796c6f 4767
aeb83f09 4768static void
dea61d92
SP
4769dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4770{
4771 struct vertex *v = &(rdg->vertices[i]);
4772 struct graph_edge *e;
4773
b8698a0f 4774 fprintf (file, "(vertex %d: (%s%s) (in:", i,
dea61d92
SP
4775 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4776 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4777
4778 if (v->pred)
4779 for (e = v->pred; e; e = e->pred_next)
4780 fprintf (file, " %d", e->src);
4781
4782 fprintf (file, ") (out:");
4783
4784 if (v->succ)
4785 for (e = v->succ; e; e = e->succ_next)
4786 fprintf (file, " %d", e->dest);
4787
cfee318d 4788 fprintf (file, ")\n");
726a989a 4789 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
dea61d92
SP
4790 fprintf (file, ")\n");
4791}
4792
4793/* Call dump_rdg_vertex on stderr. */
4794
24e47c76 4795DEBUG_FUNCTION void
dea61d92
SP
4796debug_rdg_vertex (struct graph *rdg, int i)
4797{
4798 dump_rdg_vertex (stderr, rdg, i);
4799}
4800
4801/* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4802 dumped vertices to that bitmap. */
4803
aeb83f09
RG
4804static void
4805dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
dea61d92
SP
4806{
4807 int i;
4808
4809 fprintf (file, "(%d\n", c);
4810
4811 for (i = 0; i < rdg->n_vertices; i++)
4812 if (rdg->vertices[i].component == c)
4813 {
4814 if (dumped)
4815 bitmap_set_bit (dumped, i);
4816
4817 dump_rdg_vertex (file, rdg, i);
4818 }
4819
4820 fprintf (file, ")\n");
4821}
4822
4823/* Call dump_rdg_vertex on stderr. */
4824
24e47c76 4825DEBUG_FUNCTION void
dea61d92
SP
4826debug_rdg_component (struct graph *rdg, int c)
4827{
4828 dump_rdg_component (stderr, rdg, c, NULL);
4829}
4830
4831/* Dump the reduced dependence graph RDG to FILE. */
4832
4833void
4834dump_rdg (FILE *file, struct graph *rdg)
3a796c6f
SP
4835{
4836 int i;
dea61d92
SP
4837 bitmap dumped = BITMAP_ALLOC (NULL);
4838
4839 fprintf (file, "(rdg\n");
3a796c6f
SP
4840
4841 for (i = 0; i < rdg->n_vertices; i++)
dea61d92
SP
4842 if (!bitmap_bit_p (dumped, i))
4843 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
3a796c6f 4844
dea61d92
SP
4845 fprintf (file, ")\n");
4846 BITMAP_FREE (dumped);
3a796c6f
SP
4847}
4848
dea61d92
SP
4849/* Call dump_rdg on stderr. */
4850
24e47c76 4851DEBUG_FUNCTION void
dea61d92
SP
4852debug_rdg (struct graph *rdg)
4853{
4854 dump_rdg (stderr, rdg);
4855}
3a796c6f 4856
f3241b29
SP
4857static void
4858dot_rdg_1 (FILE *file, struct graph *rdg)
4859{
4860 int i;
4861
4862 fprintf (file, "digraph RDG {\n");
4863
4864 for (i = 0; i < rdg->n_vertices; i++)
4865 {
4866 struct vertex *v = &(rdg->vertices[i]);
4867 struct graph_edge *e;
4868
4869 /* Highlight reads from memory. */
4870 if (RDG_MEM_READS_STMT (rdg, i))
4871 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4872
4873 /* Highlight stores to memory. */
4874 if (RDG_MEM_WRITE_STMT (rdg, i))
4875 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4876
4877 if (v->succ)
4878 for (e = v->succ; e; e = e->succ_next)
4879 switch (RDGE_TYPE (e))
4880 {
4881 case input_dd:
4882 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4883 break;
4884
4885 case output_dd:
4886 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4887 break;
4888
4889 case flow_dd:
4890 /* These are the most common dependences: don't print these. */
4891 fprintf (file, "%d -> %d \n", i, e->dest);
4892 break;
4893
4894 case anti_dd:
4895 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4896 break;
4897
4898 default:
4899 gcc_unreachable ();
4900 }
4901 }
4902
4903 fprintf (file, "}\n\n");
4904}
4905
4906/* Display the Reduced Dependence Graph using dotty. */
4907extern void dot_rdg (struct graph *);
4908
4909DEBUG_FUNCTION void
4910dot_rdg (struct graph *rdg)
4911{
4912 /* When debugging, enable the following code. This cannot be used
4913 in production compilers because it calls "system". */
4914#if 0
4915 FILE *file = fopen ("/tmp/rdg.dot", "w");
4916 gcc_assert (file != NULL);
4917
4918 dot_rdg_1 (file, rdg);
4919 fclose (file);
4920
4921 system ("dotty /tmp/rdg.dot &");
4922#else
4923 dot_rdg_1 (stderr, rdg);
4924#endif
4925}
4926
dea61d92
SP
4927/* This structure is used for recording the mapping statement index in
4928 the RDG. */
4929
d1b38208 4930struct GTY(()) rdg_vertex_info
dea61d92 4931{
726a989a 4932 gimple stmt;
dea61d92
SP
4933 int index;
4934};
4935
4936/* Returns the index of STMT in RDG. */
4937
4938int
726a989a 4939rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
dea61d92
SP
4940{
4941 struct rdg_vertex_info rvi, *slot;
4942
4943 rvi.stmt = stmt;
4944 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4945
4946 if (!slot)
4947 return -1;
4948
4949 return slot->index;
4950}
4951
4952/* Creates an edge in RDG for each distance vector from DDR. The
4953 order that we keep track of in the RDG is the order in which
4954 statements have to be executed. */
4955
4956static void
4957create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4958{
4959 struct graph_edge *e;
4960 int va, vb;
4961 data_reference_p dra = DDR_A (ddr);
4962 data_reference_p drb = DDR_B (ddr);
4963 unsigned level = ddr_dependence_level (ddr);
4964
4965 /* For non scalar dependences, when the dependence is REVERSED,
4966 statement B has to be executed before statement A. */
4967 if (level > 0
4968 && !DDR_REVERSED_P (ddr))
3a796c6f 4969 {
dea61d92
SP
4970 data_reference_p tmp = dra;
4971 dra = drb;
4972 drb = tmp;
3a796c6f
SP
4973 }
4974
dea61d92
SP
4975 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4976 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4977
4978 if (va < 0 || vb < 0)
4979 return;
3a796c6f
SP
4980
4981 e = add_edge (rdg, va, vb);
4982 e->data = XNEW (struct rdg_edge);
4983
dea61d92 4984 RDGE_LEVEL (e) = level;
f8bf9252 4985 RDGE_RELATION (e) = ddr;
dea61d92 4986
3a796c6f
SP
4987 /* Determines the type of the data dependence. */
4988 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4989 RDGE_TYPE (e) = input_dd;
b0af49c4 4990 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
3a796c6f 4991 RDGE_TYPE (e) = output_dd;
b0af49c4 4992 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
3a796c6f 4993 RDGE_TYPE (e) = flow_dd;
b0af49c4 4994 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
3a796c6f
SP
4995 RDGE_TYPE (e) = anti_dd;
4996}
4997
4998/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4999 the index of DEF in RDG. */
5000
5001static void
5002create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
5003{
5004 use_operand_p imm_use_p;
5005 imm_use_iterator iterator;
b8698a0f 5006
3a796c6f
SP
5007 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
5008 {
dea61d92
SP
5009 struct graph_edge *e;
5010 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
3a796c6f 5011
dea61d92
SP
5012 if (use < 0)
5013 continue;
5014
5015 e = add_edge (rdg, idef, use);
3a796c6f
SP
5016 e->data = XNEW (struct rdg_edge);
5017 RDGE_TYPE (e) = flow_dd;
f8bf9252 5018 RDGE_RELATION (e) = NULL;
3a796c6f
SP
5019 }
5020}
5021
5022/* Creates the edges of the reduced dependence graph RDG. */
5023
5024static void
5025create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
5026{
5027 int i;
5028 struct data_dependence_relation *ddr;
5029 def_operand_p def_p;
5030 ssa_op_iter iter;
5031
ac47786e 5032 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
3a796c6f
SP
5033 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5034 create_rdg_edge_for_ddr (rdg, ddr);
5035
5036 for (i = 0; i < rdg->n_vertices; i++)
dea61d92
SP
5037 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
5038 iter, SSA_OP_DEF)
3a796c6f
SP
5039 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
5040}
5041
5042/* Build the vertices of the reduced dependence graph RDG. */
5043
f8bf9252 5044void
726a989a 5045create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
3a796c6f 5046{
dea61d92 5047 int i, j;
726a989a 5048 gimple stmt;
3a796c6f 5049
ac47786e 5050 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
3a796c6f 5051 {
dea61d92
SP
5052 VEC (data_ref_loc, heap) *references;
5053 data_ref_loc *ref;
3a796c6f 5054 struct vertex *v = &(rdg->vertices[i]);
dea61d92
SP
5055 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
5056 struct rdg_vertex_info **slot;
5057
5058 rvi->stmt = stmt;
5059 rvi->index = i;
5060 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
5061
5062 if (!*slot)
5063 *slot = rvi;
5064 else
5065 free (rvi);
3a796c6f
SP
5066
5067 v->data = XNEW (struct rdg_vertex);
dea61d92
SP
5068 RDG_STMT (rdg, i) = stmt;
5069
5070 RDG_MEM_WRITE_STMT (rdg, i) = false;
5071 RDG_MEM_READS_STMT (rdg, i) = false;
726a989a 5072 if (gimple_code (stmt) == GIMPLE_PHI)
dea61d92
SP
5073 continue;
5074
5075 get_references_in_stmt (stmt, &references);
ac47786e 5076 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
dea61d92
SP
5077 if (!ref->is_read)
5078 RDG_MEM_WRITE_STMT (rdg, i) = true;
5079 else
5080 RDG_MEM_READS_STMT (rdg, i) = true;
5081
5082 VEC_free (data_ref_loc, heap, references);
3a796c6f
SP
5083 }
5084}
5085
dea61d92
SP
5086/* Initialize STMTS with all the statements of LOOP. When
5087 INCLUDE_PHIS is true, include also the PHI nodes. The order in
5088 which we discover statements is important as
5089 generate_loops_for_partition is using the same traversal for
5090 identifying statements. */
3a796c6f
SP
5091
5092static void
726a989a 5093stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
3a796c6f
SP
5094{
5095 unsigned int i;
5096 basic_block *bbs = get_loop_body_in_dom_order (loop);
5097
5098 for (i = 0; i < loop->num_nodes; i++)
5099 {
3a796c6f 5100 basic_block bb = bbs[i];
726a989a
RB
5101 gimple_stmt_iterator bsi;
5102 gimple stmt;
3a796c6f 5103
726a989a
RB
5104 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5105 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
3a796c6f 5106
726a989a
RB
5107 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5108 {
5109 stmt = gsi_stmt (bsi);
b03c3082 5110 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
726a989a
RB
5111 VEC_safe_push (gimple, heap, *stmts, stmt);
5112 }
3a796c6f
SP
5113 }
5114
5115 free (bbs);
5116}
5117
5118/* Returns true when all the dependences are computable. */
5119
5120static bool
5121known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5122{
5123 ddr_p ddr;
5124 unsigned int i;
5125
ac47786e 5126 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
3a796c6f
SP
5127 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5128 return false;
b8698a0f 5129
3a796c6f
SP
5130 return true;
5131}
5132
dea61d92
SP
5133/* Computes a hash function for element ELT. */
5134
5135static hashval_t
5136hash_stmt_vertex_info (const void *elt)
5137{
1634b18f
KG
5138 const struct rdg_vertex_info *const rvi =
5139 (const struct rdg_vertex_info *) elt;
726a989a 5140 gimple stmt = rvi->stmt;
dea61d92
SP
5141
5142 return htab_hash_pointer (stmt);
5143}
5144
5145/* Compares database elements E1 and E2. */
5146
5147static int
5148eq_stmt_vertex_info (const void *e1, const void *e2)
5149{
5150 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5151 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5152
5153 return elt1->stmt == elt2->stmt;
5154}
5155
5156/* Free the element E. */
5157
5158static void
5159hash_stmt_vertex_del (void *e)
5160{
5161 free (e);
5162}
5163
f8bf9252
SP
5164/* Build the Reduced Dependence Graph (RDG) with one vertex per
5165 statement of the loop nest, and one edge per data dependence or
5166 scalar dependence. */
5167
5168struct graph *
5169build_empty_rdg (int n_stmts)
5170{
5171 int nb_data_refs = 10;
5172 struct graph *rdg = new_graph (n_stmts);
5173
5174 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5175 eq_stmt_vertex_info, hash_stmt_vertex_del);
5176 return rdg;
5177}
5178
dea61d92
SP
5179/* Build the Reduced Dependence Graph (RDG) with one vertex per
5180 statement of the loop nest, and one edge per data dependence or
5181 scalar dependence. */
3a796c6f
SP
5182
5183struct graph *
01be8516
SP
5184build_rdg (struct loop *loop,
5185 VEC (loop_p, heap) **loop_nest,
5186 VEC (ddr_p, heap) **dependence_relations,
5187 VEC (data_reference_p, heap) **datarefs)
3a796c6f 5188{
3a796c6f 5189 struct graph *rdg = NULL;
3a796c6f 5190
8a1da634
JJ
5191 if (compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5192 dependence_relations)
5193 && known_dependences_p (*dependence_relations))
01be8516 5194 {
8a1da634 5195 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
01be8516
SP
5196 stmts_from_loop (loop, &stmts);
5197 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5198 create_rdg_vertices (rdg, stmts);
5199 create_rdg_edges (rdg, *dependence_relations);
8a1da634 5200 VEC_free (gimple, heap, stmts);
01be8516 5201 }
3a796c6f 5202
3a796c6f
SP
5203 return rdg;
5204}
dea61d92
SP
5205
5206/* Free the reduced dependence graph RDG. */
5207
5208void
5209free_rdg (struct graph *rdg)
5210{
5211 int i;
5212
5213 for (i = 0; i < rdg->n_vertices; i++)
01be8516
SP
5214 {
5215 struct vertex *v = &(rdg->vertices[i]);
5216 struct graph_edge *e;
5217
5218 for (e = v->succ; e; e = e->succ_next)
04695783 5219 free (e->data);
01be8516 5220
04695783 5221 free (v->data);
01be8516 5222 }
dea61d92
SP
5223
5224 htab_delete (rdg->indices);
5225 free_graph (rdg);
5226}
5227
5228/* Initialize STMTS with all the statements of LOOP that contain a
5229 store to memory. */
5230
5231void
726a989a 5232stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
dea61d92
SP
5233{
5234 unsigned int i;
5235 basic_block *bbs = get_loop_body_in_dom_order (loop);
5236
5237 for (i = 0; i < loop->num_nodes; i++)
5238 {
5239 basic_block bb = bbs[i];
726a989a 5240 gimple_stmt_iterator bsi;
dea61d92 5241
726a989a 5242 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5006671f 5243 if (gimple_vdef (gsi_stmt (bsi)))
726a989a 5244 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
dea61d92
SP
5245 }
5246
5247 free (bbs);
5248}
5249
cfee318d
SP
5250/* Returns true when the statement at STMT is of the form "A[i] = 0"
5251 that contains a data reference on its LHS with a stride of the same
5252 size as its unit type. */
5e37ea0e 5253
cfee318d
SP
5254bool
5255stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5e37ea0e 5256{
cfee318d 5257 tree op0, op1;
5e37ea0e 5258 bool res;
cfee318d
SP
5259 struct data_reference *dr;
5260
5261 if (!stmt
5262 || !gimple_vdef (stmt)
5263 || !is_gimple_assign (stmt)
5264 || !gimple_assign_single_p (stmt)
5265 || !(op1 = gimple_assign_rhs1 (stmt))
5266 || !(integer_zerop (op1) || real_zerop (op1)))
5267 return false;
5268
5269 dr = XCNEW (struct data_reference);
5270 op0 = gimple_assign_lhs (stmt);
5e37ea0e
SP
5271
5272 DR_STMT (dr) = stmt;
5273 DR_REF (dr) = op0;
5274
4e4452b6 5275 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5e37ea0e
SP
5276 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5277
5278 free_data_ref (dr);
5279 return res;
5280}
5281
20769d5e
SP
5282/* Initialize STMTS with all the statements of LOOP that contain a
5283 store to memory of the form "A[i] = 0". */
5284
5285void
5286stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5287{
5288 unsigned int i;
5289 basic_block bb;
5290 gimple_stmt_iterator si;
5291 gimple stmt;
20769d5e
SP
5292 basic_block *bbs = get_loop_body_in_dom_order (loop);
5293
5294 for (i = 0; i < loop->num_nodes; i++)
5295 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5296 if ((stmt = gsi_stmt (si))
cfee318d 5297 && stmt_with_adjacent_zero_store_dr_p (stmt))
20769d5e
SP
5298 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5299
5300 free (bbs);
5301}
5302
dea61d92
SP
5303/* For a data reference REF, return the declaration of its base
5304 address or NULL_TREE if the base is not determined. */
5305
5306static inline tree
726a989a 5307ref_base_address (gimple stmt, data_ref_loc *ref)
dea61d92
SP
5308{
5309 tree base = NULL_TREE;
5310 tree base_address;
5311 struct data_reference *dr = XCNEW (struct data_reference);
5312
5313 DR_STMT (dr) = stmt;
5314 DR_REF (dr) = *ref->pos;
4e4452b6 5315 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
dea61d92
SP
5316 base_address = DR_BASE_ADDRESS (dr);
5317
5318 if (!base_address)
5319 goto end;
5320
5321 switch (TREE_CODE (base_address))
5322 {
5323 case ADDR_EXPR:
5324 base = TREE_OPERAND (base_address, 0);
5325 break;
5326
5327 default:
5328 base = base_address;
5329 break;
5330 }
5331
5332 end:
5333 free_data_ref (dr);
5334 return base;
5335}
5336
5337/* Determines whether the statement from vertex V of the RDG has a
5338 definition used outside the loop that contains this statement. */
5339
5340bool
5341rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5342{
726a989a 5343 gimple stmt = RDG_STMT (rdg, v);
dea61d92
SP
5344 struct loop *loop = loop_containing_stmt (stmt);
5345 use_operand_p imm_use_p;
5346 imm_use_iterator iterator;
5347 ssa_op_iter it;
5348 def_operand_p def_p;
5349
5350 if (!loop)
5351 return true;
5352
5353 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5354 {
5355 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5356 {
5357 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5358 return true;
5359 }
5360 }
5361
5362 return false;
5363}
5364
5365/* Determines whether statements S1 and S2 access to similar memory
5366 locations. Two memory accesses are considered similar when they
5367 have the same base address declaration, i.e. when their
5368 ref_base_address is the same. */
5369
5370bool
726a989a 5371have_similar_memory_accesses (gimple s1, gimple s2)
dea61d92
SP
5372{
5373 bool res = false;
5374 unsigned i, j;
5375 VEC (data_ref_loc, heap) *refs1, *refs2;
5376 data_ref_loc *ref1, *ref2;
5377
5378 get_references_in_stmt (s1, &refs1);
5379 get_references_in_stmt (s2, &refs2);
5380
ac47786e 5381 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
dea61d92
SP
5382 {
5383 tree base1 = ref_base_address (s1, ref1);
5384
5385 if (base1)
ac47786e 5386 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
dea61d92
SP
5387 if (base1 == ref_base_address (s2, ref2))
5388 {
5389 res = true;
5390 goto end;
5391 }
5392 }
5393
5394 end:
5395 VEC_free (data_ref_loc, heap, refs1);
5396 VEC_free (data_ref_loc, heap, refs2);
5397 return res;
5398}
5399
5400/* Helper function for the hashtab. */
5401
5402static int
5403have_similar_memory_accesses_1 (const void *s1, const void *s2)
5404{
726a989a
RB
5405 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5406 CONST_CAST_GIMPLE ((const_gimple) s2));
dea61d92
SP
5407}
5408
5409/* Helper function for the hashtab. */
5410
5411static hashval_t
5412ref_base_address_1 (const void *s)
5413{
726a989a 5414 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
dea61d92
SP
5415 unsigned i;
5416 VEC (data_ref_loc, heap) *refs;
5417 data_ref_loc *ref;
5418 hashval_t res = 0;
5419
5420 get_references_in_stmt (stmt, &refs);
5421
ac47786e 5422 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
dea61d92
SP
5423 if (!ref->is_read)
5424 {
5425 res = htab_hash_pointer (ref_base_address (stmt, ref));
5426 break;
5427 }
5428
5429 VEC_free (data_ref_loc, heap, refs);
5430 return res;
5431}
5432
5433/* Try to remove duplicated write data references from STMTS. */
5434
5435void
726a989a 5436remove_similar_memory_refs (VEC (gimple, heap) **stmts)
dea61d92
SP
5437{
5438 unsigned i;
726a989a
RB
5439 gimple stmt;
5440 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
dea61d92
SP
5441 have_similar_memory_accesses_1, NULL);
5442
726a989a 5443 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
dea61d92
SP
5444 {
5445 void **slot;
5446
5447 slot = htab_find_slot (seen, stmt, INSERT);
5448
5449 if (*slot)
726a989a 5450 VEC_ordered_remove (gimple, *stmts, i);
dea61d92
SP
5451 else
5452 {
5453 *slot = (void *) stmt;
5454 i++;
5455 }
5456 }
5457
5458 htab_delete (seen);
5459}
5460