]>
Commit | Line | Data |
---|---|---|
56cf8686 | 1 | /* Data references and dependences detectors. |
ad616de1 | 2 | Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. |
56cf8686 SP |
3 | Contributed by Sebastian Pop <s.pop@laposte.net> |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
19 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | 02111-1307, USA. */ | |
21 | ||
22 | /* This pass walks a given loop structure searching for array | |
23 | references. The information about the array accesses is recorded | |
24 | in DATA_REFERENCE structures. | |
25 | ||
26 | The basic test for determining the dependences is: | |
27 | given two access functions chrec1 and chrec2 to a same array, and | |
28 | x and y two vectors from the iteration domain, the same element of | |
29 | the array is accessed twice at iterations x and y if and only if: | |
30 | | chrec1 (x) == chrec2 (y). | |
31 | ||
32 | The goals of this analysis are: | |
33 | ||
34 | - to determine the independence: the relation between two | |
35 | independent accesses is qualified with the chrec_known (this | |
36 | information allows a loop parallelization), | |
37 | ||
38 | - when two data references access the same data, to qualify the | |
39 | dependence relation with classic dependence representations: | |
40 | ||
41 | - distance vectors | |
42 | - direction vectors | |
43 | - loop carried level dependence | |
44 | - polyhedron dependence | |
45 | or with the chains of recurrences based representation, | |
46 | ||
50300b4c | 47 | - to define a knowledge base for storing the data dependence |
56cf8686 SP |
48 | information, |
49 | ||
50 | - to define an interface to access this data. | |
51 | ||
52 | ||
53 | Definitions: | |
54 | ||
55 | - subscript: given two array accesses a subscript is the tuple | |
56 | composed of the access functions for a given dimension. Example: | |
57 | Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts: | |
58 | (f1, g1), (f2, g2), (f3, g3). | |
59 | ||
60 | - Diophantine equation: an equation whose coefficients and | |
61 | solutions are integer constants, for example the equation | |
62 | | 3*x + 2*y = 1 | |
63 | has an integer solution x = 1 and y = -1. | |
64 | ||
65 | References: | |
66 | ||
67 | - "Advanced Compilation for High Performance Computing" by Randy | |
68 | Allen and Ken Kennedy. | |
69 | http://citeseer.ist.psu.edu/goff91practical.html | |
70 | ||
71 | - "Loop Transformations for Restructuring Compilers - The Foundations" | |
72 | by Utpal Banerjee. | |
73 | ||
74 | ||
75 | */ | |
76 | ||
77 | #include "config.h" | |
78 | #include "system.h" | |
79 | #include "coretypes.h" | |
80 | #include "tm.h" | |
81 | #include "errors.h" | |
82 | #include "ggc.h" | |
83 | #include "tree.h" | |
84 | ||
85 | /* These RTL headers are needed for basic-block.h. */ | |
86 | #include "rtl.h" | |
87 | #include "basic-block.h" | |
88 | #include "diagnostic.h" | |
89 | #include "tree-flow.h" | |
90 | #include "tree-dump.h" | |
91 | #include "timevar.h" | |
92 | #include "cfgloop.h" | |
93 | #include "tree-chrec.h" | |
94 | #include "tree-data-ref.h" | |
95 | #include "tree-scalar-evolution.h" | |
96 | #include "tree-pass.h" | |
56cf8686 | 97 | |
79fe1b3b | 98 | /* This is the simplest data dependence test: determines whether the |
68b26d5c SP |
99 | data references A and B access the same array/region. Returns |
100 | false when the property is not computable at compile time. | |
101 | Otherwise return true, and DIFFER_P will record the result. This | |
102 | utility will not be necessary when alias_sets_conflict_p will be | |
103 | less conservative. */ | |
79fe1b3b DN |
104 | |
105 | bool | |
106 | array_base_name_differ_p (struct data_reference *a, | |
107 | struct data_reference *b, | |
108 | bool *differ_p) | |
109 | { | |
110 | tree base_a = DR_BASE_NAME (a); | |
111 | tree base_b = DR_BASE_NAME (b); | |
112 | tree ta = TREE_TYPE (base_a); | |
113 | tree tb = TREE_TYPE (base_b); | |
114 | ||
86df10e3 | 115 | |
68b26d5c SP |
116 | /* Determine if same base. Example: for the array accesses |
117 | a[i], b[i] or pointer accesses *a, *b, bases are a, b. */ | |
79fe1b3b DN |
118 | if (base_a == base_b) |
119 | { | |
120 | *differ_p = false; | |
121 | return true; | |
122 | } | |
123 | ||
68b26d5c SP |
124 | /* For pointer based accesses, (*p)[i], (*q)[j], the bases are (*p) |
125 | and (*q) */ | |
79fe1b3b DN |
126 | if (TREE_CODE (base_a) == INDIRECT_REF && TREE_CODE (base_b) == INDIRECT_REF |
127 | && TREE_OPERAND (base_a, 0) == TREE_OPERAND (base_b, 0)) | |
128 | { | |
129 | *differ_p = false; | |
130 | return true; | |
131 | } | |
132 | ||
86df10e3 | 133 | /* Record/union based accesses - s.a[i], t.b[j]. bases are s.a,t.b. */ |
79fe1b3b DN |
134 | if (TREE_CODE (base_a) == COMPONENT_REF && TREE_CODE (base_b) == COMPONENT_REF |
135 | && TREE_OPERAND (base_a, 0) == TREE_OPERAND (base_b, 0) | |
136 | && TREE_OPERAND (base_a, 1) == TREE_OPERAND (base_b, 1)) | |
137 | { | |
138 | *differ_p = false; | |
139 | return true; | |
140 | } | |
141 | ||
86df10e3 | 142 | |
68b26d5c | 143 | /* Determine if different bases. */ |
79fe1b3b | 144 | |
68b26d5c | 145 | /* At this point we know that base_a != base_b. However, pointer |
59c4456e KH |
146 | accesses of the form x=(*p) and y=(*q), whose bases are p and q, |
147 | may still be pointing to the same base. In SSAed GIMPLE p and q will | |
148 | be SSA_NAMES in this case. Therefore, here we check if they are | |
149 | really two different declarations. */ | |
79fe1b3b DN |
150 | if (TREE_CODE (base_a) == VAR_DECL && TREE_CODE (base_b) == VAR_DECL) |
151 | { | |
152 | *differ_p = true; | |
153 | return true; | |
154 | } | |
155 | ||
68b26d5c SP |
156 | /* Compare two record/union bases s.a and t.b: s != t or (a != b and |
157 | s and t are not unions). */ | |
79fe1b3b DN |
158 | if (TREE_CODE (base_a) == COMPONENT_REF && TREE_CODE (base_b) == COMPONENT_REF |
159 | && ((TREE_CODE (TREE_OPERAND (base_a, 0)) == VAR_DECL | |
160 | && TREE_CODE (TREE_OPERAND (base_b, 0)) == VAR_DECL | |
161 | && TREE_OPERAND (base_a, 0) != TREE_OPERAND (base_b, 0)) | |
162 | || (TREE_CODE (TREE_TYPE (TREE_OPERAND (base_a, 0))) == RECORD_TYPE | |
163 | && TREE_CODE (TREE_TYPE (TREE_OPERAND (base_b, 0))) == RECORD_TYPE | |
164 | && TREE_OPERAND (base_a, 1) != TREE_OPERAND (base_b, 1)))) | |
165 | { | |
166 | *differ_p = true; | |
167 | return true; | |
168 | } | |
169 | ||
68b26d5c | 170 | /* Compare a record/union access and an array access. */ |
79fe1b3b DN |
171 | if ((TREE_CODE (base_a) == VAR_DECL |
172 | && (TREE_CODE (base_b) == COMPONENT_REF | |
173 | && TREE_CODE (TREE_OPERAND (base_b, 0)) == VAR_DECL)) | |
174 | || (TREE_CODE (base_b) == VAR_DECL | |
175 | && (TREE_CODE (base_a) == COMPONENT_REF | |
176 | && TREE_CODE (TREE_OPERAND (base_a, 0)) == VAR_DECL))) | |
177 | { | |
178 | *differ_p = true; | |
179 | return true; | |
180 | } | |
181 | ||
182 | if (!alias_sets_conflict_p (get_alias_set (base_a), get_alias_set (base_b))) | |
183 | { | |
184 | *differ_p = true; | |
185 | return true; | |
186 | } | |
187 | ||
68b26d5c SP |
188 | /* An instruction writing through a restricted pointer is |
189 | "independent" of any instruction reading or writing through a | |
190 | different pointer, in the same block/scope. */ | |
79fe1b3b DN |
191 | if ((TREE_CODE (ta) == POINTER_TYPE && TYPE_RESTRICT (ta) |
192 | && !DR_IS_READ(a)) | |
193 | || (TREE_CODE (tb) == POINTER_TYPE && TYPE_RESTRICT (tb) | |
194 | && !DR_IS_READ(b))) | |
195 | { | |
196 | *differ_p = true; | |
197 | return true; | |
198 | } | |
199 | ||
79fe1b3b DN |
200 | return false; |
201 | } | |
56cf8686 SP |
202 | |
203 | /* Returns true iff A divides B. */ | |
204 | ||
205 | static inline bool | |
206 | tree_fold_divides_p (tree type, | |
207 | tree a, | |
208 | tree b) | |
209 | { | |
56cf8686 SP |
210 | /* Determines whether (A == gcd (A, B)). */ |
211 | return integer_zerop | |
212 | (fold (build (MINUS_EXPR, type, a, tree_fold_gcd (a, b)))); | |
213 | } | |
214 | ||
86df10e3 SP |
215 | /* Compute the greatest common denominator of two numbers using |
216 | Euclid's algorithm. */ | |
56cf8686 | 217 | |
86df10e3 SP |
218 | static int |
219 | gcd (int a, int b) | |
56cf8686 | 220 | { |
56cf8686 | 221 | |
86df10e3 | 222 | int x, y, z; |
56cf8686 | 223 | |
86df10e3 SP |
224 | x = abs (a); |
225 | y = abs (b); | |
226 | ||
227 | while (x>0) | |
56cf8686 | 228 | { |
86df10e3 SP |
229 | z = y % x; |
230 | y = x; | |
231 | x = z; | |
56cf8686 | 232 | } |
86df10e3 SP |
233 | |
234 | return (y); | |
235 | } | |
236 | ||
237 | /* Returns true iff A divides B. */ | |
238 | ||
239 | static inline bool | |
240 | int_divides_p (int a, int b) | |
241 | { | |
242 | return ((b % a) == 0); | |
56cf8686 SP |
243 | } |
244 | ||
245 | \f | |
246 | ||
247 | /* Dump into FILE all the data references from DATAREFS. */ | |
248 | ||
249 | void | |
250 | dump_data_references (FILE *file, | |
251 | varray_type datarefs) | |
252 | { | |
253 | unsigned int i; | |
254 | ||
255 | for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) | |
256 | dump_data_reference (file, VARRAY_GENERIC_PTR (datarefs, i)); | |
257 | } | |
258 | ||
259 | /* Dump into FILE all the dependence relations from DDR. */ | |
260 | ||
261 | void | |
262 | dump_data_dependence_relations (FILE *file, | |
263 | varray_type ddr) | |
264 | { | |
265 | unsigned int i; | |
266 | ||
267 | for (i = 0; i < VARRAY_ACTIVE_SIZE (ddr); i++) | |
268 | dump_data_dependence_relation (file, VARRAY_GENERIC_PTR (ddr, i)); | |
269 | } | |
270 | ||
271 | /* Dump function for a DATA_REFERENCE structure. */ | |
272 | ||
273 | void | |
274 | dump_data_reference (FILE *outf, | |
275 | struct data_reference *dr) | |
276 | { | |
277 | unsigned int i; | |
278 | ||
36d59cf7 | 279 | fprintf (outf, "(Data Ref: \n stmt: "); |
56cf8686 SP |
280 | print_generic_stmt (outf, DR_STMT (dr), 0); |
281 | fprintf (outf, " ref: "); | |
282 | print_generic_stmt (outf, DR_REF (dr), 0); | |
283 | fprintf (outf, " base_name: "); | |
284 | print_generic_stmt (outf, DR_BASE_NAME (dr), 0); | |
285 | ||
286 | for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) | |
287 | { | |
288 | fprintf (outf, " Access function %d: ", i); | |
289 | print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0); | |
290 | } | |
291 | fprintf (outf, ")\n"); | |
292 | } | |
293 | ||
86df10e3 SP |
294 | /* Dump function for a SUBSCRIPT structure. */ |
295 | ||
296 | void | |
297 | dump_subscript (FILE *outf, struct subscript *subscript) | |
298 | { | |
299 | tree chrec = SUB_CONFLICTS_IN_A (subscript); | |
300 | ||
301 | fprintf (outf, "\n (subscript \n"); | |
302 | fprintf (outf, " iterations_that_access_an_element_twice_in_A: "); | |
303 | print_generic_stmt (outf, chrec, 0); | |
304 | if (chrec == chrec_known) | |
305 | fprintf (outf, " (no dependence)\n"); | |
306 | else if (chrec_contains_undetermined (chrec)) | |
307 | fprintf (outf, " (don't know)\n"); | |
308 | else | |
309 | { | |
310 | tree last_iteration = SUB_LAST_CONFLICT (subscript); | |
311 | fprintf (outf, " last_conflict: "); | |
312 | print_generic_stmt (outf, last_iteration, 0); | |
313 | } | |
314 | ||
315 | chrec = SUB_CONFLICTS_IN_B (subscript); | |
316 | fprintf (outf, " iterations_that_access_an_element_twice_in_B: "); | |
317 | print_generic_stmt (outf, chrec, 0); | |
318 | if (chrec == chrec_known) | |
319 | fprintf (outf, " (no dependence)\n"); | |
320 | else if (chrec_contains_undetermined (chrec)) | |
321 | fprintf (outf, " (don't know)\n"); | |
322 | else | |
323 | { | |
324 | tree last_iteration = SUB_LAST_CONFLICT (subscript); | |
325 | fprintf (outf, " last_conflict: "); | |
326 | print_generic_stmt (outf, last_iteration, 0); | |
327 | } | |
328 | ||
329 | fprintf (outf, " (Subscript distance: "); | |
330 | print_generic_stmt (outf, SUB_DISTANCE (subscript), 0); | |
331 | fprintf (outf, " )\n"); | |
332 | fprintf (outf, " )\n"); | |
333 | } | |
334 | ||
56cf8686 SP |
335 | /* Dump function for a DATA_DEPENDENCE_RELATION structure. */ |
336 | ||
337 | void | |
338 | dump_data_dependence_relation (FILE *outf, | |
339 | struct data_dependence_relation *ddr) | |
340 | { | |
56cf8686 | 341 | struct data_reference *dra, *drb; |
86df10e3 | 342 | |
56cf8686 SP |
343 | dra = DDR_A (ddr); |
344 | drb = DDR_B (ddr); | |
86df10e3 SP |
345 | fprintf (outf, "(Data Dep: \n"); |
346 | if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) | |
56cf8686 SP |
347 | fprintf (outf, " (don't know)\n"); |
348 | ||
349 | else if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
350 | fprintf (outf, " (no dependence)\n"); | |
351 | ||
86df10e3 | 352 | else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) |
56cf8686 | 353 | { |
86df10e3 | 354 | unsigned int i; |
56cf8686 SP |
355 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) |
356 | { | |
56cf8686 SP |
357 | fprintf (outf, " access_fn_A: "); |
358 | print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0); | |
359 | fprintf (outf, " access_fn_B: "); | |
360 | print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0); | |
86df10e3 | 361 | dump_subscript (outf, DDR_SUBSCRIPT (ddr, i)); |
56cf8686 | 362 | } |
86df10e3 | 363 | if (DDR_DIST_VECT (ddr)) |
56cf8686 | 364 | { |
86df10e3 SP |
365 | fprintf (outf, " distance_vect: "); |
366 | print_lambda_vector (outf, DDR_DIST_VECT (ddr), DDR_SIZE_VECT (ddr)); | |
367 | } | |
368 | if (DDR_DIR_VECT (ddr)) | |
369 | { | |
370 | fprintf (outf, " direction_vect: "); | |
371 | print_lambda_vector (outf, DDR_DIR_VECT (ddr), DDR_SIZE_VECT (ddr)); | |
56cf8686 | 372 | } |
56cf8686 SP |
373 | } |
374 | ||
375 | fprintf (outf, ")\n"); | |
376 | } | |
377 | ||
378 | ||
379 | ||
380 | /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */ | |
381 | ||
382 | void | |
383 | dump_data_dependence_direction (FILE *file, | |
384 | enum data_dependence_direction dir) | |
385 | { | |
386 | switch (dir) | |
387 | { | |
388 | case dir_positive: | |
389 | fprintf (file, "+"); | |
390 | break; | |
391 | ||
392 | case dir_negative: | |
393 | fprintf (file, "-"); | |
394 | break; | |
395 | ||
396 | case dir_equal: | |
397 | fprintf (file, "="); | |
398 | break; | |
399 | ||
400 | case dir_positive_or_negative: | |
401 | fprintf (file, "+-"); | |
402 | break; | |
403 | ||
404 | case dir_positive_or_equal: | |
405 | fprintf (file, "+="); | |
406 | break; | |
407 | ||
408 | case dir_negative_or_equal: | |
409 | fprintf (file, "-="); | |
410 | break; | |
411 | ||
412 | case dir_star: | |
413 | fprintf (file, "*"); | |
414 | break; | |
415 | ||
416 | default: | |
417 | break; | |
418 | } | |
419 | } | |
420 | ||
86df10e3 SP |
421 | /* Dumps the distance and direction vectors in FILE. DDRS contains |
422 | the dependence relations, and VECT_SIZE is the size of the | |
423 | dependence vectors, or in other words the number of loops in the | |
424 | considered nest. */ | |
425 | ||
426 | void | |
427 | dump_dist_dir_vectors (FILE *file, varray_type ddrs) | |
428 | { | |
429 | unsigned int i; | |
430 | ||
431 | for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++) | |
432 | { | |
433 | struct data_dependence_relation *ddr = | |
434 | (struct data_dependence_relation *) | |
435 | VARRAY_GENERIC_PTR (ddrs, i); | |
436 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE | |
437 | && DDR_AFFINE_P (ddr)) | |
438 | { | |
439 | fprintf (file, "DISTANCE_V ("); | |
440 | print_lambda_vector (file, DDR_DIST_VECT (ddr), DDR_SIZE_VECT (ddr)); | |
441 | fprintf (file, ")\n"); | |
442 | fprintf (file, "DIRECTION_V ("); | |
443 | print_lambda_vector (file, DDR_DIR_VECT (ddr), DDR_SIZE_VECT (ddr)); | |
444 | fprintf (file, ")\n"); | |
445 | } | |
446 | } | |
447 | fprintf (file, "\n\n"); | |
448 | } | |
449 | ||
450 | /* Dumps the data dependence relations DDRS in FILE. */ | |
451 | ||
452 | void | |
453 | dump_ddrs (FILE *file, varray_type ddrs) | |
454 | { | |
455 | unsigned int i; | |
456 | ||
457 | for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++) | |
458 | { | |
459 | struct data_dependence_relation *ddr = | |
460 | (struct data_dependence_relation *) | |
461 | VARRAY_GENERIC_PTR (ddrs, i); | |
462 | dump_data_dependence_relation (file, ddr); | |
463 | } | |
464 | fprintf (file, "\n\n"); | |
465 | } | |
466 | ||
56cf8686 SP |
467 | \f |
468 | ||
86df10e3 SP |
469 | /* Compute the lowest iteration bound for LOOP. It is an |
470 | INTEGER_CST. */ | |
471 | ||
472 | static void | |
473 | compute_estimated_nb_iterations (struct loop *loop) | |
474 | { | |
475 | tree estimation; | |
476 | struct nb_iter_bound *bound, *next; | |
477 | ||
478 | for (bound = loop->bounds; bound; bound = next) | |
479 | { | |
480 | next = bound->next; | |
481 | estimation = bound->bound; | |
482 | ||
483 | if (TREE_CODE (estimation) != INTEGER_CST) | |
484 | continue; | |
485 | ||
486 | if (loop->estimated_nb_iterations) | |
487 | { | |
488 | /* Update only if estimation is smaller. */ | |
489 | if (tree_int_cst_lt (estimation, loop->estimated_nb_iterations)) | |
490 | loop->estimated_nb_iterations = estimation; | |
491 | } | |
492 | else | |
493 | loop->estimated_nb_iterations = estimation; | |
494 | } | |
495 | } | |
496 | ||
497 | /* Estimate the number of iterations from the size of the data and the | |
498 | access functions. */ | |
499 | ||
500 | static void | |
501 | estimate_niter_from_size_of_data (struct loop *loop, | |
502 | tree opnd0, | |
503 | tree access_fn, | |
504 | tree stmt) | |
505 | { | |
506 | tree estimation; | |
507 | tree array_size, data_size, element_size; | |
508 | tree init, step; | |
509 | ||
510 | init = initial_condition (access_fn); | |
511 | step = evolution_part_in_loop_num (access_fn, loop->num); | |
512 | ||
513 | array_size = TYPE_SIZE (TREE_TYPE (opnd0)); | |
514 | element_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (opnd0))); | |
515 | if (array_size == NULL_TREE | |
9ba64a4a SP |
516 | || TREE_CODE (array_size) != INTEGER_CST |
517 | || TREE_CODE (element_size) != INTEGER_CST) | |
86df10e3 SP |
518 | return; |
519 | ||
520 | data_size = fold (build2 (EXACT_DIV_EXPR, integer_type_node, | |
9ba64a4a | 521 | array_size, element_size)); |
86df10e3 SP |
522 | |
523 | if (init != NULL_TREE | |
524 | && step != NULL_TREE | |
525 | && TREE_CODE (init) == INTEGER_CST | |
526 | && TREE_CODE (step) == INTEGER_CST) | |
527 | { | |
528 | estimation = fold (build2 (CEIL_DIV_EXPR, integer_type_node, | |
529 | fold (build2 (MINUS_EXPR, integer_type_node, | |
530 | data_size, init)), step)); | |
531 | ||
532 | record_estimate (loop, estimation, boolean_true_node, stmt); | |
533 | } | |
534 | } | |
535 | ||
56cf8686 SP |
536 | /* Given an ARRAY_REF node REF, records its access functions. |
537 | Example: given A[i][3], record in ACCESS_FNS the opnd1 function, | |
89dbed81 KH |
538 | i.e. the constant "3", then recursively call the function on opnd0, |
539 | i.e. the ARRAY_REF "A[i]". The function returns the base name: | |
56cf8686 SP |
540 | "A". */ |
541 | ||
542 | static tree | |
543 | analyze_array_indexes (struct loop *loop, | |
6775f1f3 | 544 | varray_type *access_fns, |
86df10e3 | 545 | tree ref, tree stmt) |
56cf8686 SP |
546 | { |
547 | tree opnd0, opnd1; | |
548 | tree access_fn; | |
549 | ||
550 | opnd0 = TREE_OPERAND (ref, 0); | |
551 | opnd1 = TREE_OPERAND (ref, 1); | |
552 | ||
553 | /* The detection of the evolution function for this data access is | |
554 | postponed until the dependence test. This lazy strategy avoids | |
555 | the computation of access functions that are of no interest for | |
556 | the optimizers. */ | |
557 | access_fn = instantiate_parameters | |
558 | (loop, analyze_scalar_evolution (loop, opnd1)); | |
86df10e3 SP |
559 | |
560 | if (loop->estimated_nb_iterations == NULL_TREE) | |
561 | estimate_niter_from_size_of_data (loop, opnd0, access_fn, stmt); | |
56cf8686 | 562 | |
6775f1f3 | 563 | VARRAY_PUSH_TREE (*access_fns, access_fn); |
56cf8686 SP |
564 | |
565 | /* Recursively record other array access functions. */ | |
566 | if (TREE_CODE (opnd0) == ARRAY_REF) | |
86df10e3 | 567 | return analyze_array_indexes (loop, access_fns, opnd0, stmt); |
56cf8686 SP |
568 | |
569 | /* Return the base name of the data access. */ | |
570 | else | |
571 | return opnd0; | |
572 | } | |
573 | ||
6cb38cd4 | 574 | /* For a data reference REF contained in the statement STMT, initialize |
56cf8686 SP |
575 | a DATA_REFERENCE structure, and return it. IS_READ flag has to be |
576 | set to true when REF is in the right hand side of an | |
577 | assignment. */ | |
578 | ||
579 | struct data_reference * | |
580 | analyze_array (tree stmt, tree ref, bool is_read) | |
581 | { | |
582 | struct data_reference *res; | |
583 | ||
584 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
585 | { | |
586 | fprintf (dump_file, "(analyze_array \n"); | |
587 | fprintf (dump_file, " (ref = "); | |
588 | print_generic_stmt (dump_file, ref, 0); | |
589 | fprintf (dump_file, ")\n"); | |
590 | } | |
591 | ||
36d59cf7 | 592 | res = xmalloc (sizeof (struct data_reference)); |
56cf8686 | 593 | |
56cf8686 SP |
594 | DR_STMT (res) = stmt; |
595 | DR_REF (res) = ref; | |
596 | VARRAY_TREE_INIT (DR_ACCESS_FNS (res), 3, "access_fns"); | |
597 | DR_BASE_NAME (res) = analyze_array_indexes | |
86df10e3 | 598 | (loop_containing_stmt (stmt), &(DR_ACCESS_FNS (res)), ref, stmt); |
56cf8686 SP |
599 | DR_IS_READ (res) = is_read; |
600 | ||
601 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
602 | fprintf (dump_file, ")\n"); | |
603 | ||
604 | return res; | |
605 | } | |
606 | ||
6cb38cd4 | 607 | /* For a data reference REF contained in the statement STMT, initialize |
56cf8686 SP |
608 | a DATA_REFERENCE structure, and return it. */ |
609 | ||
610 | struct data_reference * | |
611 | init_data_ref (tree stmt, | |
612 | tree ref, | |
613 | tree base, | |
79fe1b3b DN |
614 | tree access_fn, |
615 | bool is_read) | |
56cf8686 SP |
616 | { |
617 | struct data_reference *res; | |
618 | ||
619 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
620 | { | |
621 | fprintf (dump_file, "(init_data_ref \n"); | |
622 | fprintf (dump_file, " (ref = "); | |
623 | print_generic_stmt (dump_file, ref, 0); | |
624 | fprintf (dump_file, ")\n"); | |
625 | } | |
626 | ||
36d59cf7 | 627 | res = xmalloc (sizeof (struct data_reference)); |
56cf8686 | 628 | |
56cf8686 SP |
629 | DR_STMT (res) = stmt; |
630 | DR_REF (res) = ref; | |
631 | VARRAY_TREE_INIT (DR_ACCESS_FNS (res), 5, "access_fns"); | |
632 | DR_BASE_NAME (res) = base; | |
633 | VARRAY_PUSH_TREE (DR_ACCESS_FNS (res), access_fn); | |
79fe1b3b | 634 | DR_IS_READ (res) = is_read; |
56cf8686 SP |
635 | |
636 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
637 | fprintf (dump_file, ")\n"); | |
638 | ||
639 | return res; | |
640 | } | |
641 | ||
642 | \f | |
643 | ||
86df10e3 SP |
644 | /* Returns true when all the functions of a tree_vec CHREC are the |
645 | same. */ | |
646 | ||
647 | static bool | |
648 | all_chrecs_equal_p (tree chrec) | |
649 | { | |
650 | int j; | |
651 | ||
652 | for (j = 0; j < TREE_VEC_LENGTH (chrec) - 1; j++) | |
653 | { | |
654 | tree chrec_j = TREE_VEC_ELT (chrec, j); | |
655 | tree chrec_j_1 = TREE_VEC_ELT (chrec, j + 1); | |
656 | if (!integer_zerop | |
657 | (chrec_fold_minus | |
658 | (integer_type_node, chrec_j, chrec_j_1))) | |
659 | return false; | |
660 | } | |
661 | return true; | |
662 | } | |
663 | ||
664 | /* Determine for each subscript in the data dependence relation DDR | |
665 | the distance. */ | |
56cf8686 SP |
666 | |
667 | static void | |
86df10e3 | 668 | compute_subscript_distance (struct data_dependence_relation *ddr) |
56cf8686 SP |
669 | { |
670 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) | |
671 | { | |
672 | unsigned int i; | |
673 | ||
674 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) | |
675 | { | |
676 | tree conflicts_a, conflicts_b, difference; | |
677 | struct subscript *subscript; | |
678 | ||
679 | subscript = DDR_SUBSCRIPT (ddr, i); | |
680 | conflicts_a = SUB_CONFLICTS_IN_A (subscript); | |
681 | conflicts_b = SUB_CONFLICTS_IN_B (subscript); | |
86df10e3 SP |
682 | |
683 | if (TREE_CODE (conflicts_a) == TREE_VEC) | |
684 | { | |
685 | if (!all_chrecs_equal_p (conflicts_a)) | |
686 | { | |
687 | SUB_DISTANCE (subscript) = chrec_dont_know; | |
688 | return; | |
689 | } | |
690 | else | |
691 | conflicts_a = TREE_VEC_ELT (conflicts_a, 0); | |
692 | } | |
693 | ||
694 | if (TREE_CODE (conflicts_b) == TREE_VEC) | |
695 | { | |
696 | if (!all_chrecs_equal_p (conflicts_b)) | |
697 | { | |
698 | SUB_DISTANCE (subscript) = chrec_dont_know; | |
699 | return; | |
700 | } | |
701 | else | |
702 | conflicts_b = TREE_VEC_ELT (conflicts_b, 0); | |
703 | } | |
704 | ||
705 | difference = chrec_fold_minus | |
56cf8686 SP |
706 | (integer_type_node, conflicts_b, conflicts_a); |
707 | ||
708 | if (evolution_function_is_constant_p (difference)) | |
709 | SUB_DISTANCE (subscript) = difference; | |
710 | ||
711 | else | |
712 | SUB_DISTANCE (subscript) = chrec_dont_know; | |
713 | } | |
714 | } | |
715 | } | |
716 | ||
717 | /* Initialize a ddr. */ | |
718 | ||
719 | struct data_dependence_relation * | |
720 | initialize_data_dependence_relation (struct data_reference *a, | |
721 | struct data_reference *b) | |
722 | { | |
723 | struct data_dependence_relation *res; | |
79fe1b3b | 724 | bool differ_p; |
56cf8686 | 725 | |
36d59cf7 | 726 | res = xmalloc (sizeof (struct data_dependence_relation)); |
56cf8686 SP |
727 | DDR_A (res) = a; |
728 | DDR_B (res) = b; | |
729 | ||
730 | if (a == NULL || b == NULL | |
731 | || DR_BASE_NAME (a) == NULL_TREE | |
732 | || DR_BASE_NAME (b) == NULL_TREE) | |
733 | DDR_ARE_DEPENDENT (res) = chrec_dont_know; | |
734 | ||
735 | /* When the dimensions of A and B differ, we directly initialize | |
736 | the relation to "there is no dependence": chrec_known. */ | |
737 | else if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b) | |
79fe1b3b | 738 | || (array_base_name_differ_p (a, b, &differ_p) && differ_p)) |
56cf8686 SP |
739 | DDR_ARE_DEPENDENT (res) = chrec_known; |
740 | ||
741 | else | |
742 | { | |
743 | unsigned int i; | |
86df10e3 | 744 | DDR_AFFINE_P (res) = true; |
56cf8686 SP |
745 | DDR_ARE_DEPENDENT (res) = NULL_TREE; |
746 | DDR_SUBSCRIPTS_VECTOR_INIT (res, DR_NUM_DIMENSIONS (a)); | |
86df10e3 SP |
747 | DDR_SIZE_VECT (res) = 0; |
748 | DDR_DIST_VECT (res) = NULL; | |
749 | DDR_DIR_VECT (res) = NULL; | |
56cf8686 SP |
750 | |
751 | for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) | |
752 | { | |
753 | struct subscript *subscript; | |
754 | ||
36d59cf7 | 755 | subscript = xmalloc (sizeof (struct subscript)); |
56cf8686 SP |
756 | SUB_CONFLICTS_IN_A (subscript) = chrec_dont_know; |
757 | SUB_CONFLICTS_IN_B (subscript) = chrec_dont_know; | |
86df10e3 | 758 | SUB_LAST_CONFLICT (subscript) = chrec_dont_know; |
56cf8686 | 759 | SUB_DISTANCE (subscript) = chrec_dont_know; |
56cf8686 SP |
760 | VARRAY_PUSH_GENERIC_PTR (DDR_SUBSCRIPTS (res), subscript); |
761 | } | |
762 | } | |
763 | ||
764 | return res; | |
765 | } | |
766 | ||
767 | /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap | |
768 | description. */ | |
769 | ||
770 | static inline void | |
771 | finalize_ddr_dependent (struct data_dependence_relation *ddr, | |
772 | tree chrec) | |
773 | { | |
36d59cf7 DB |
774 | if (dump_file && (dump_flags & TDF_DETAILS)) |
775 | { | |
776 | fprintf (dump_file, "(dependence classified: "); | |
777 | print_generic_expr (dump_file, chrec, 0); | |
778 | fprintf (dump_file, ")\n"); | |
779 | } | |
780 | ||
56cf8686 SP |
781 | DDR_ARE_DEPENDENT (ddr) = chrec; |
782 | varray_clear (DDR_SUBSCRIPTS (ddr)); | |
783 | } | |
784 | ||
86df10e3 SP |
785 | /* The dependence relation DDR cannot be represented by a distance |
786 | vector. */ | |
787 | ||
788 | static inline void | |
789 | non_affine_dependence_relation (struct data_dependence_relation *ddr) | |
790 | { | |
791 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
792 | fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n"); | |
793 | ||
794 | DDR_AFFINE_P (ddr) = false; | |
795 | } | |
796 | ||
56cf8686 SP |
797 | \f |
798 | ||
799 | /* This section contains the classic Banerjee tests. */ | |
800 | ||
801 | /* Returns true iff CHREC_A and CHREC_B are not dependent on any index | |
802 | variables, i.e., if the ZIV (Zero Index Variable) test is true. */ | |
803 | ||
804 | static inline bool | |
805 | ziv_subscript_p (tree chrec_a, | |
806 | tree chrec_b) | |
807 | { | |
808 | return (evolution_function_is_constant_p (chrec_a) | |
809 | && evolution_function_is_constant_p (chrec_b)); | |
810 | } | |
811 | ||
812 | /* Returns true iff CHREC_A and CHREC_B are dependent on an index | |
813 | variable, i.e., if the SIV (Single Index Variable) test is true. */ | |
814 | ||
815 | static bool | |
816 | siv_subscript_p (tree chrec_a, | |
817 | tree chrec_b) | |
818 | { | |
819 | if ((evolution_function_is_constant_p (chrec_a) | |
820 | && evolution_function_is_univariate_p (chrec_b)) | |
821 | || (evolution_function_is_constant_p (chrec_b) | |
822 | && evolution_function_is_univariate_p (chrec_a))) | |
823 | return true; | |
824 | ||
825 | if (evolution_function_is_univariate_p (chrec_a) | |
826 | && evolution_function_is_univariate_p (chrec_b)) | |
827 | { | |
828 | switch (TREE_CODE (chrec_a)) | |
829 | { | |
830 | case POLYNOMIAL_CHREC: | |
831 | switch (TREE_CODE (chrec_b)) | |
832 | { | |
833 | case POLYNOMIAL_CHREC: | |
834 | if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b)) | |
835 | return false; | |
836 | ||
837 | default: | |
838 | return true; | |
839 | } | |
840 | ||
841 | default: | |
842 | return true; | |
843 | } | |
844 | } | |
845 | ||
846 | return false; | |
847 | } | |
848 | ||
849 | /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and | |
850 | *OVERLAPS_B are initialized to the functions that describe the | |
851 | relation between the elements accessed twice by CHREC_A and | |
852 | CHREC_B. For k >= 0, the following property is verified: | |
853 | ||
854 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ | |
855 | ||
856 | static void | |
857 | analyze_ziv_subscript (tree chrec_a, | |
858 | tree chrec_b, | |
859 | tree *overlaps_a, | |
86df10e3 SP |
860 | tree *overlaps_b, |
861 | tree *last_conflicts) | |
56cf8686 SP |
862 | { |
863 | tree difference; | |
864 | ||
865 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
866 | fprintf (dump_file, "(analyze_ziv_subscript \n"); | |
867 | ||
868 | difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b); | |
869 | ||
870 | switch (TREE_CODE (difference)) | |
871 | { | |
872 | case INTEGER_CST: | |
873 | if (integer_zerop (difference)) | |
874 | { | |
875 | /* The difference is equal to zero: the accessed index | |
876 | overlaps for each iteration in the loop. */ | |
877 | *overlaps_a = integer_zero_node; | |
878 | *overlaps_b = integer_zero_node; | |
86df10e3 | 879 | *last_conflicts = chrec_dont_know; |
56cf8686 SP |
880 | } |
881 | else | |
882 | { | |
883 | /* The accesses do not overlap. */ | |
884 | *overlaps_a = chrec_known; | |
86df10e3 SP |
885 | *overlaps_b = chrec_known; |
886 | *last_conflicts = integer_zero_node; | |
56cf8686 SP |
887 | } |
888 | break; | |
889 | ||
890 | default: | |
891 | /* We're not sure whether the indexes overlap. For the moment, | |
892 | conservatively answer "don't know". */ | |
893 | *overlaps_a = chrec_dont_know; | |
86df10e3 SP |
894 | *overlaps_b = chrec_dont_know; |
895 | *last_conflicts = chrec_dont_know; | |
56cf8686 SP |
896 | break; |
897 | } | |
898 | ||
899 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
900 | fprintf (dump_file, ")\n"); | |
901 | } | |
902 | ||
903 | /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a | |
904 | constant, and CHREC_B is an affine function. *OVERLAPS_A and | |
905 | *OVERLAPS_B are initialized to the functions that describe the | |
906 | relation between the elements accessed twice by CHREC_A and | |
907 | CHREC_B. For k >= 0, the following property is verified: | |
908 | ||
909 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ | |
910 | ||
911 | static void | |
912 | analyze_siv_subscript_cst_affine (tree chrec_a, | |
913 | tree chrec_b, | |
914 | tree *overlaps_a, | |
86df10e3 SP |
915 | tree *overlaps_b, |
916 | tree *last_conflicts) | |
56cf8686 SP |
917 | { |
918 | bool value0, value1, value2; | |
919 | tree difference = chrec_fold_minus | |
920 | (integer_type_node, CHREC_LEFT (chrec_b), chrec_a); | |
921 | ||
922 | if (!chrec_is_positive (initial_condition (difference), &value0)) | |
923 | { | |
924 | *overlaps_a = chrec_dont_know; | |
925 | *overlaps_b = chrec_dont_know; | |
86df10e3 | 926 | *last_conflicts = chrec_dont_know; |
56cf8686 SP |
927 | return; |
928 | } | |
929 | else | |
930 | { | |
931 | if (value0 == false) | |
932 | { | |
933 | if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1)) | |
934 | { | |
935 | *overlaps_a = chrec_dont_know; | |
936 | *overlaps_b = chrec_dont_know; | |
86df10e3 | 937 | *last_conflicts = chrec_dont_know; |
56cf8686 SP |
938 | return; |
939 | } | |
940 | else | |
941 | { | |
942 | if (value1 == true) | |
943 | { | |
944 | /* Example: | |
945 | chrec_a = 12 | |
946 | chrec_b = {10, +, 1} | |
947 | */ | |
948 | ||
949 | if (tree_fold_divides_p | |
950 | (integer_type_node, CHREC_RIGHT (chrec_b), difference)) | |
951 | { | |
952 | *overlaps_a = integer_zero_node; | |
953 | *overlaps_b = fold | |
954 | (build (EXACT_DIV_EXPR, integer_type_node, | |
955 | fold (build1 (ABS_EXPR, integer_type_node, difference)), | |
956 | CHREC_RIGHT (chrec_b))); | |
86df10e3 | 957 | *last_conflicts = integer_one_node; |
56cf8686 SP |
958 | return; |
959 | } | |
960 | ||
961 | /* When the step does not divides the difference, there are | |
962 | no overlaps. */ | |
963 | else | |
964 | { | |
965 | *overlaps_a = chrec_known; | |
966 | *overlaps_b = chrec_known; | |
86df10e3 | 967 | *last_conflicts = integer_zero_node; |
56cf8686 SP |
968 | return; |
969 | } | |
970 | } | |
971 | ||
972 | else | |
973 | { | |
974 | /* Example: | |
975 | chrec_a = 12 | |
976 | chrec_b = {10, +, -1} | |
977 | ||
978 | In this case, chrec_a will not overlap with chrec_b. */ | |
979 | *overlaps_a = chrec_known; | |
980 | *overlaps_b = chrec_known; | |
86df10e3 | 981 | *last_conflicts = integer_zero_node; |
56cf8686 SP |
982 | return; |
983 | } | |
984 | } | |
985 | } | |
986 | else | |
987 | { | |
988 | if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2)) | |
989 | { | |
990 | *overlaps_a = chrec_dont_know; | |
991 | *overlaps_b = chrec_dont_know; | |
86df10e3 | 992 | *last_conflicts = chrec_dont_know; |
56cf8686 SP |
993 | return; |
994 | } | |
995 | else | |
996 | { | |
997 | if (value2 == false) | |
998 | { | |
999 | /* Example: | |
1000 | chrec_a = 3 | |
1001 | chrec_b = {10, +, -1} | |
1002 | */ | |
1003 | if (tree_fold_divides_p | |
1004 | (integer_type_node, CHREC_RIGHT (chrec_b), difference)) | |
1005 | { | |
1006 | *overlaps_a = integer_zero_node; | |
1007 | *overlaps_b = fold | |
1008 | (build (EXACT_DIV_EXPR, integer_type_node, difference, | |
1009 | CHREC_RIGHT (chrec_b))); | |
86df10e3 | 1010 | *last_conflicts = integer_one_node; |
56cf8686 SP |
1011 | return; |
1012 | } | |
1013 | ||
1014 | /* When the step does not divides the difference, there | |
1015 | are no overlaps. */ | |
1016 | else | |
1017 | { | |
1018 | *overlaps_a = chrec_known; | |
1019 | *overlaps_b = chrec_known; | |
86df10e3 | 1020 | *last_conflicts = integer_zero_node; |
56cf8686 SP |
1021 | return; |
1022 | } | |
1023 | } | |
1024 | else | |
1025 | { | |
1026 | /* Example: | |
1027 | chrec_a = 3 | |
1028 | chrec_b = {4, +, 1} | |
1029 | ||
1030 | In this case, chrec_a will not overlap with chrec_b. */ | |
1031 | *overlaps_a = chrec_known; | |
1032 | *overlaps_b = chrec_known; | |
86df10e3 | 1033 | *last_conflicts = integer_zero_node; |
56cf8686 SP |
1034 | return; |
1035 | } | |
1036 | } | |
1037 | } | |
1038 | } | |
1039 | } | |
1040 | ||
50300b4c | 1041 | /* Helper recursive function for initializing the matrix A. Returns |
86df10e3 | 1042 | the initial value of CHREC. */ |
56cf8686 | 1043 | |
86df10e3 SP |
1044 | static int |
1045 | initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult) | |
1046 | { | |
1047 | gcc_assert (chrec); | |
1048 | ||
1049 | if (TREE_CODE (chrec) != POLYNOMIAL_CHREC) | |
1050 | return int_cst_value (chrec); | |
1051 | ||
1052 | A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec)); | |
1053 | return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult); | |
1054 | } | |
1055 | ||
1056 | #define FLOOR_DIV(x,y) ((x) / (y)) | |
1057 | ||
1058 | /* Solves the special case of the Diophantine equation: | |
1059 | | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B) | |
1060 | ||
1061 | Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the | |
1062 | number of iterations that loops X and Y run. The overlaps will be | |
1063 | constructed as evolutions in dimension DIM. */ | |
56cf8686 SP |
1064 | |
1065 | static void | |
86df10e3 SP |
1066 | compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b, |
1067 | tree *overlaps_a, tree *overlaps_b, | |
1068 | tree *last_conflicts, int dim) | |
1069 | { | |
1070 | if (((step_a > 0 && step_b > 0) | |
1071 | || (step_a < 0 && step_b < 0))) | |
1072 | { | |
1073 | int step_overlaps_a, step_overlaps_b; | |
1074 | int gcd_steps_a_b, last_conflict, tau2; | |
1075 | ||
1076 | gcd_steps_a_b = gcd (step_a, step_b); | |
1077 | step_overlaps_a = step_b / gcd_steps_a_b; | |
1078 | step_overlaps_b = step_a / gcd_steps_a_b; | |
1079 | ||
1080 | tau2 = FLOOR_DIV (niter, step_overlaps_a); | |
1081 | tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b)); | |
1082 | last_conflict = tau2; | |
1083 | ||
1084 | *overlaps_a = build_polynomial_chrec | |
1085 | (dim, integer_zero_node, | |
1086 | build_int_cst (NULL_TREE, step_overlaps_a)); | |
1087 | *overlaps_b = build_polynomial_chrec | |
1088 | (dim, integer_zero_node, | |
1089 | build_int_cst (NULL_TREE, step_overlaps_b)); | |
1090 | *last_conflicts = build_int_cst (NULL_TREE, last_conflict); | |
1091 | } | |
1092 | ||
1093 | else | |
1094 | { | |
1095 | *overlaps_a = integer_zero_node; | |
1096 | *overlaps_b = integer_zero_node; | |
1097 | *last_conflicts = integer_zero_node; | |
1098 | } | |
1099 | } | |
1100 | ||
1101 | ||
1102 | /* Solves the special case of a Diophantine equation where CHREC_A is | |
1103 | an affine bivariate function, and CHREC_B is an affine univariate | |
1104 | function. For example, | |
1105 | ||
1106 | | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z | |
1107 | ||
1108 | has the following overlapping functions: | |
1109 | ||
1110 | | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v | |
1111 | | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v | |
1112 | | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v | |
1113 | ||
35fd3193 | 1114 | FORNOW: This is a specialized implementation for a case occurring in |
86df10e3 SP |
1115 | a common benchmark. Implement the general algorithm. */ |
1116 | ||
1117 | static void | |
1118 | compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b, | |
1119 | tree *overlaps_a, tree *overlaps_b, | |
1120 | tree *last_conflicts) | |
56cf8686 | 1121 | { |
86df10e3 SP |
1122 | bool xz_p, yz_p, xyz_p; |
1123 | int step_x, step_y, step_z; | |
1124 | int niter_x, niter_y, niter_z, niter; | |
1125 | tree numiter_x, numiter_y, numiter_z; | |
1126 | tree overlaps_a_xz, overlaps_b_xz, last_conflicts_xz; | |
1127 | tree overlaps_a_yz, overlaps_b_yz, last_conflicts_yz; | |
1128 | tree overlaps_a_xyz, overlaps_b_xyz, last_conflicts_xyz; | |
1129 | ||
1130 | step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a))); | |
1131 | step_y = int_cst_value (CHREC_RIGHT (chrec_a)); | |
1132 | step_z = int_cst_value (CHREC_RIGHT (chrec_b)); | |
1133 | ||
1134 | numiter_x = number_of_iterations_in_loop | |
1135 | (current_loops->parray[CHREC_VARIABLE (CHREC_LEFT (chrec_a))]); | |
1136 | numiter_y = number_of_iterations_in_loop | |
1137 | (current_loops->parray[CHREC_VARIABLE (chrec_a)]); | |
1138 | numiter_z = number_of_iterations_in_loop | |
1139 | (current_loops->parray[CHREC_VARIABLE (chrec_b)]); | |
1140 | ||
1141 | if (TREE_CODE (numiter_x) != INTEGER_CST) | |
1142 | numiter_x = current_loops->parray[CHREC_VARIABLE (CHREC_LEFT (chrec_a))] | |
1143 | ->estimated_nb_iterations; | |
1144 | if (TREE_CODE (numiter_y) != INTEGER_CST) | |
1145 | numiter_y = current_loops->parray[CHREC_VARIABLE (chrec_a)] | |
1146 | ->estimated_nb_iterations; | |
1147 | if (TREE_CODE (numiter_z) != INTEGER_CST) | |
1148 | numiter_z = current_loops->parray[CHREC_VARIABLE (chrec_b)] | |
1149 | ->estimated_nb_iterations; | |
1150 | ||
1151 | if (numiter_x == NULL_TREE || numiter_y == NULL_TREE | |
1152 | || numiter_z == NULL_TREE) | |
1153 | { | |
1154 | *overlaps_a = chrec_dont_know; | |
1155 | *overlaps_b = chrec_dont_know; | |
1156 | *last_conflicts = chrec_dont_know; | |
1157 | return; | |
1158 | } | |
1159 | ||
1160 | niter_x = int_cst_value (numiter_x); | |
1161 | niter_y = int_cst_value (numiter_y); | |
1162 | niter_z = int_cst_value (numiter_z); | |
1163 | ||
1164 | niter = MIN (niter_x, niter_z); | |
1165 | compute_overlap_steps_for_affine_univar (niter, step_x, step_z, | |
1166 | &overlaps_a_xz, | |
1167 | &overlaps_b_xz, | |
1168 | &last_conflicts_xz, 1); | |
1169 | niter = MIN (niter_y, niter_z); | |
1170 | compute_overlap_steps_for_affine_univar (niter, step_y, step_z, | |
1171 | &overlaps_a_yz, | |
1172 | &overlaps_b_yz, | |
1173 | &last_conflicts_yz, 2); | |
1174 | niter = MIN (niter_x, niter_z); | |
1175 | niter = MIN (niter_y, niter); | |
1176 | compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z, | |
1177 | &overlaps_a_xyz, | |
1178 | &overlaps_b_xyz, | |
1179 | &last_conflicts_xyz, 3); | |
1180 | ||
1181 | xz_p = !integer_zerop (last_conflicts_xz); | |
1182 | yz_p = !integer_zerop (last_conflicts_yz); | |
1183 | xyz_p = !integer_zerop (last_conflicts_xyz); | |
1184 | ||
1185 | if (xz_p || yz_p || xyz_p) | |
1186 | { | |
1187 | *overlaps_a = make_tree_vec (2); | |
1188 | TREE_VEC_ELT (*overlaps_a, 0) = integer_zero_node; | |
1189 | TREE_VEC_ELT (*overlaps_a, 1) = integer_zero_node; | |
1190 | *overlaps_b = integer_zero_node; | |
1191 | if (xz_p) | |
1192 | { | |
1193 | TREE_VEC_ELT (*overlaps_a, 0) = | |
1194 | chrec_fold_plus (integer_type_node, TREE_VEC_ELT (*overlaps_a, 0), | |
1195 | overlaps_a_xz); | |
1196 | *overlaps_b = | |
1197 | chrec_fold_plus (integer_type_node, *overlaps_b, overlaps_b_xz); | |
1198 | *last_conflicts = last_conflicts_xz; | |
1199 | } | |
1200 | if (yz_p) | |
1201 | { | |
1202 | TREE_VEC_ELT (*overlaps_a, 1) = | |
1203 | chrec_fold_plus (integer_type_node, TREE_VEC_ELT (*overlaps_a, 1), | |
1204 | overlaps_a_yz); | |
1205 | *overlaps_b = | |
1206 | chrec_fold_plus (integer_type_node, *overlaps_b, overlaps_b_yz); | |
1207 | *last_conflicts = last_conflicts_yz; | |
1208 | } | |
1209 | if (xyz_p) | |
1210 | { | |
1211 | TREE_VEC_ELT (*overlaps_a, 0) = | |
1212 | chrec_fold_plus (integer_type_node, TREE_VEC_ELT (*overlaps_a, 0), | |
1213 | overlaps_a_xyz); | |
1214 | TREE_VEC_ELT (*overlaps_a, 1) = | |
1215 | chrec_fold_plus (integer_type_node, TREE_VEC_ELT (*overlaps_a, 1), | |
1216 | overlaps_a_xyz); | |
1217 | *overlaps_b = | |
1218 | chrec_fold_plus (integer_type_node, *overlaps_b, overlaps_b_xyz); | |
1219 | *last_conflicts = last_conflicts_xyz; | |
1220 | } | |
1221 | } | |
1222 | else | |
1223 | { | |
1224 | *overlaps_a = integer_zero_node; | |
1225 | *overlaps_b = integer_zero_node; | |
1226 | *last_conflicts = integer_zero_node; | |
1227 | } | |
56cf8686 SP |
1228 | } |
1229 | ||
1230 | /* Determines the overlapping elements due to accesses CHREC_A and | |
1231 | CHREC_B, that are affine functions. This is a part of the | |
1232 | subscript analyzer. */ | |
1233 | ||
1234 | static void | |
1235 | analyze_subscript_affine_affine (tree chrec_a, | |
1236 | tree chrec_b, | |
1237 | tree *overlaps_a, | |
86df10e3 SP |
1238 | tree *overlaps_b, |
1239 | tree *last_conflicts) | |
56cf8686 | 1240 | { |
86df10e3 SP |
1241 | unsigned nb_vars_a, nb_vars_b, dim; |
1242 | int init_a, init_b, gamma, gcd_alpha_beta; | |
1243 | int tau1, tau2; | |
1244 | lambda_matrix A, U, S; | |
1245 | ||
56cf8686 SP |
1246 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1247 | fprintf (dump_file, "(analyze_subscript_affine_affine \n"); | |
1248 | ||
1249 | /* For determining the initial intersection, we have to solve a | |
1250 | Diophantine equation. This is the most time consuming part. | |
1251 | ||
1252 | For answering to the question: "Is there a dependence?" we have | |
1253 | to prove that there exists a solution to the Diophantine | |
1254 | equation, and that the solution is in the iteration domain, | |
89dbed81 | 1255 | i.e. the solution is positive or zero, and that the solution |
56cf8686 SP |
1256 | happens before the upper bound loop.nb_iterations. Otherwise |
1257 | there is no dependence. This function outputs a description of | |
1258 | the iterations that hold the intersections. */ | |
1259 | ||
56cf8686 | 1260 | |
86df10e3 SP |
1261 | nb_vars_a = nb_vars_in_chrec (chrec_a); |
1262 | nb_vars_b = nb_vars_in_chrec (chrec_b); | |
1263 | ||
1264 | dim = nb_vars_a + nb_vars_b; | |
1265 | U = lambda_matrix_new (dim, dim); | |
1266 | A = lambda_matrix_new (dim, 1); | |
1267 | S = lambda_matrix_new (dim, 1); | |
1268 | ||
1269 | init_a = initialize_matrix_A (A, chrec_a, 0, 1); | |
1270 | init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1); | |
1271 | gamma = init_b - init_a; | |
1272 | ||
1273 | /* Don't do all the hard work of solving the Diophantine equation | |
1274 | when we already know the solution: for example, | |
1275 | | {3, +, 1}_1 | |
1276 | | {3, +, 4}_2 | |
1277 | | gamma = 3 - 3 = 0. | |
1278 | Then the first overlap occurs during the first iterations: | |
1279 | | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x) | |
1280 | */ | |
1281 | if (gamma == 0) | |
56cf8686 | 1282 | { |
86df10e3 | 1283 | if (nb_vars_a == 1 && nb_vars_b == 1) |
56cf8686 | 1284 | { |
86df10e3 SP |
1285 | int step_a, step_b; |
1286 | int niter, niter_a, niter_b; | |
1287 | tree numiter_a, numiter_b; | |
1288 | ||
1289 | numiter_a = number_of_iterations_in_loop | |
1290 | (current_loops->parray[CHREC_VARIABLE (chrec_a)]); | |
1291 | numiter_b = number_of_iterations_in_loop | |
1292 | (current_loops->parray[CHREC_VARIABLE (chrec_b)]); | |
1293 | ||
1294 | if (TREE_CODE (numiter_a) != INTEGER_CST) | |
1295 | numiter_a = current_loops->parray[CHREC_VARIABLE (chrec_a)] | |
1296 | ->estimated_nb_iterations; | |
1297 | if (TREE_CODE (numiter_b) != INTEGER_CST) | |
1298 | numiter_b = current_loops->parray[CHREC_VARIABLE (chrec_b)] | |
1299 | ->estimated_nb_iterations; | |
1300 | if (numiter_a == NULL_TREE || numiter_b == NULL_TREE) | |
1301 | { | |
1302 | *overlaps_a = chrec_dont_know; | |
1303 | *overlaps_b = chrec_dont_know; | |
1304 | *last_conflicts = chrec_dont_know; | |
1305 | return; | |
1306 | } | |
1307 | ||
1308 | niter_a = int_cst_value (numiter_a); | |
1309 | niter_b = int_cst_value (numiter_b); | |
1310 | niter = MIN (niter_a, niter_b); | |
1311 | ||
1312 | step_a = int_cst_value (CHREC_RIGHT (chrec_a)); | |
1313 | step_b = int_cst_value (CHREC_RIGHT (chrec_b)); | |
1314 | ||
1315 | compute_overlap_steps_for_affine_univar (niter, step_a, step_b, | |
1316 | overlaps_a, overlaps_b, | |
1317 | last_conflicts, 1); | |
56cf8686 | 1318 | } |
86df10e3 SP |
1319 | |
1320 | else if (nb_vars_a == 2 && nb_vars_b == 1) | |
1321 | compute_overlap_steps_for_affine_1_2 | |
1322 | (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts); | |
1323 | ||
1324 | else if (nb_vars_a == 1 && nb_vars_b == 2) | |
1325 | compute_overlap_steps_for_affine_1_2 | |
1326 | (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts); | |
1327 | ||
1328 | else | |
56cf8686 | 1329 | { |
86df10e3 SP |
1330 | *overlaps_a = chrec_dont_know; |
1331 | *overlaps_b = chrec_dont_know; | |
1332 | *last_conflicts = chrec_dont_know; | |
56cf8686 | 1333 | } |
86df10e3 SP |
1334 | return; |
1335 | } | |
1336 | ||
1337 | /* U.A = S */ | |
1338 | lambda_matrix_right_hermite (A, dim, 1, S, U); | |
1339 | ||
1340 | if (S[0][0] < 0) | |
1341 | { | |
1342 | S[0][0] *= -1; | |
1343 | lambda_matrix_row_negate (U, dim, 0); | |
1344 | } | |
1345 | gcd_alpha_beta = S[0][0]; | |
1346 | ||
1347 | /* The classic "gcd-test". */ | |
1348 | if (!int_divides_p (gcd_alpha_beta, gamma)) | |
1349 | { | |
1350 | /* The "gcd-test" has determined that there is no integer | |
1351 | solution, i.e. there is no dependence. */ | |
1352 | *overlaps_a = chrec_known; | |
1353 | *overlaps_b = chrec_known; | |
1354 | *last_conflicts = integer_zero_node; | |
1355 | } | |
1356 | ||
1357 | /* Both access functions are univariate. This includes SIV and MIV cases. */ | |
1358 | else if (nb_vars_a == 1 && nb_vars_b == 1) | |
1359 | { | |
1360 | /* Both functions should have the same evolution sign. */ | |
1361 | if (((A[0][0] > 0 && -A[1][0] > 0) | |
1362 | || (A[0][0] < 0 && -A[1][0] < 0))) | |
56cf8686 SP |
1363 | { |
1364 | /* The solutions are given by: | |
1365 | | | |
86df10e3 SP |
1366 | | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0] |
1367 | | [u21 u22] [y0] | |
1368 | ||
56cf8686 | 1369 | For a given integer t. Using the following variables, |
86df10e3 | 1370 | |
56cf8686 SP |
1371 | | i0 = u11 * gamma / gcd_alpha_beta |
1372 | | j0 = u12 * gamma / gcd_alpha_beta | |
1373 | | i1 = u21 | |
1374 | | j1 = u22 | |
86df10e3 | 1375 | |
56cf8686 | 1376 | the solutions are: |
86df10e3 SP |
1377 | |
1378 | | x0 = i0 + i1 * t, | |
1379 | | y0 = j0 + j1 * t. */ | |
1380 | ||
1381 | int i0, j0, i1, j1; | |
1382 | ||
56cf8686 SP |
1383 | /* X0 and Y0 are the first iterations for which there is a |
1384 | dependence. X0, Y0 are two solutions of the Diophantine | |
1385 | equation: chrec_a (X0) = chrec_b (Y0). */ | |
86df10e3 SP |
1386 | int x0, y0; |
1387 | int niter, niter_a, niter_b; | |
1388 | tree numiter_a, numiter_b; | |
1389 | ||
1390 | numiter_a = number_of_iterations_in_loop | |
1391 | (current_loops->parray[CHREC_VARIABLE (chrec_a)]); | |
1392 | numiter_b = number_of_iterations_in_loop | |
1393 | (current_loops->parray[CHREC_VARIABLE (chrec_b)]); | |
1394 | ||
1395 | if (TREE_CODE (numiter_a) != INTEGER_CST) | |
1396 | numiter_a = current_loops->parray[CHREC_VARIABLE (chrec_a)] | |
1397 | ->estimated_nb_iterations; | |
1398 | if (TREE_CODE (numiter_b) != INTEGER_CST) | |
1399 | numiter_b = current_loops->parray[CHREC_VARIABLE (chrec_b)] | |
1400 | ->estimated_nb_iterations; | |
1401 | if (numiter_a == NULL_TREE || numiter_b == NULL_TREE) | |
1402 | { | |
1403 | *overlaps_a = chrec_dont_know; | |
1404 | *overlaps_b = chrec_dont_know; | |
1405 | *last_conflicts = chrec_dont_know; | |
1406 | return; | |
1407 | } | |
1408 | ||
1409 | niter_a = int_cst_value (numiter_a); | |
1410 | niter_b = int_cst_value (numiter_b); | |
1411 | niter = MIN (niter_a, niter_b); | |
1412 | ||
1413 | i0 = U[0][0] * gamma / gcd_alpha_beta; | |
1414 | j0 = U[0][1] * gamma / gcd_alpha_beta; | |
1415 | i1 = U[1][0]; | |
1416 | j1 = U[1][1]; | |
1417 | ||
1418 | if ((i1 == 0 && i0 < 0) | |
1419 | || (j1 == 0 && j0 < 0)) | |
56cf8686 SP |
1420 | { |
1421 | /* There is no solution. | |
1422 | FIXME: The case "i0 > nb_iterations, j0 > nb_iterations" | |
1423 | falls in here, but for the moment we don't look at the | |
1424 | upper bound of the iteration domain. */ | |
1425 | *overlaps_a = chrec_known; | |
1426 | *overlaps_b = chrec_known; | |
86df10e3 SP |
1427 | *last_conflicts = integer_zero_node; |
1428 | } | |
1429 | ||
56cf8686 SP |
1430 | else |
1431 | { | |
86df10e3 | 1432 | if (i1 > 0) |
56cf8686 | 1433 | { |
86df10e3 SP |
1434 | tau1 = CEIL (-i0, i1); |
1435 | tau2 = FLOOR_DIV (niter - i0, i1); | |
1436 | ||
1437 | if (j1 > 0) | |
56cf8686 | 1438 | { |
9ba64a4a | 1439 | int last_conflict, min_multiple; |
86df10e3 SP |
1440 | tau1 = MAX (tau1, CEIL (-j0, j1)); |
1441 | tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1)); | |
1442 | ||
9ba64a4a SP |
1443 | x0 = i1 * tau1 + i0; |
1444 | y0 = j1 * tau1 + j0; | |
1445 | ||
1446 | /* At this point (x0, y0) is one of the | |
1447 | solutions to the Diophantine equation. The | |
1448 | next step has to compute the smallest | |
1449 | positive solution: the first conflicts. */ | |
1450 | min_multiple = MIN (x0 / i1, y0 / j1); | |
1451 | x0 -= i1 * min_multiple; | |
1452 | y0 -= j1 * min_multiple; | |
1453 | ||
86df10e3 SP |
1454 | tau1 = (x0 - i0)/i1; |
1455 | last_conflict = tau2 - tau1; | |
1456 | ||
1457 | *overlaps_a = build_polynomial_chrec | |
1458 | (1, | |
1459 | build_int_cst (NULL_TREE, x0), | |
1460 | build_int_cst (NULL_TREE, i1)); | |
1461 | *overlaps_b = build_polynomial_chrec | |
1462 | (1, | |
1463 | build_int_cst (NULL_TREE, y0), | |
1464 | build_int_cst (NULL_TREE, j1)); | |
1465 | *last_conflicts = build_int_cst (NULL_TREE, last_conflict); | |
56cf8686 SP |
1466 | } |
1467 | else | |
1468 | { | |
1469 | /* FIXME: For the moment, the upper bound of the | |
471854f8 | 1470 | iteration domain for j is not checked. */ |
56cf8686 SP |
1471 | *overlaps_a = chrec_dont_know; |
1472 | *overlaps_b = chrec_dont_know; | |
86df10e3 | 1473 | *last_conflicts = chrec_dont_know; |
56cf8686 SP |
1474 | } |
1475 | } | |
86df10e3 | 1476 | |
56cf8686 SP |
1477 | else |
1478 | { | |
1479 | /* FIXME: For the moment, the upper bound of the | |
471854f8 | 1480 | iteration domain for i is not checked. */ |
56cf8686 SP |
1481 | *overlaps_a = chrec_dont_know; |
1482 | *overlaps_b = chrec_dont_know; | |
86df10e3 | 1483 | *last_conflicts = chrec_dont_know; |
56cf8686 SP |
1484 | } |
1485 | } | |
1486 | } | |
86df10e3 SP |
1487 | else |
1488 | { | |
1489 | *overlaps_a = chrec_dont_know; | |
1490 | *overlaps_b = chrec_dont_know; | |
1491 | *last_conflicts = chrec_dont_know; | |
1492 | } | |
56cf8686 | 1493 | } |
86df10e3 | 1494 | |
56cf8686 SP |
1495 | else |
1496 | { | |
56cf8686 SP |
1497 | *overlaps_a = chrec_dont_know; |
1498 | *overlaps_b = chrec_dont_know; | |
86df10e3 | 1499 | *last_conflicts = chrec_dont_know; |
56cf8686 | 1500 | } |
86df10e3 SP |
1501 | |
1502 | ||
56cf8686 SP |
1503 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1504 | { | |
1505 | fprintf (dump_file, " (overlaps_a = "); | |
1506 | print_generic_expr (dump_file, *overlaps_a, 0); | |
1507 | fprintf (dump_file, ")\n (overlaps_b = "); | |
1508 | print_generic_expr (dump_file, *overlaps_b, 0); | |
1509 | fprintf (dump_file, ")\n"); | |
1510 | } | |
1511 | ||
1512 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1513 | fprintf (dump_file, ")\n"); | |
1514 | } | |
1515 | ||
1516 | /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and | |
1517 | *OVERLAPS_B are initialized to the functions that describe the | |
1518 | relation between the elements accessed twice by CHREC_A and | |
1519 | CHREC_B. For k >= 0, the following property is verified: | |
1520 | ||
1521 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ | |
1522 | ||
1523 | static void | |
1524 | analyze_siv_subscript (tree chrec_a, | |
1525 | tree chrec_b, | |
1526 | tree *overlaps_a, | |
86df10e3 SP |
1527 | tree *overlaps_b, |
1528 | tree *last_conflicts) | |
56cf8686 SP |
1529 | { |
1530 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1531 | fprintf (dump_file, "(analyze_siv_subscript \n"); | |
1532 | ||
1533 | if (evolution_function_is_constant_p (chrec_a) | |
1534 | && evolution_function_is_affine_p (chrec_b)) | |
1535 | analyze_siv_subscript_cst_affine (chrec_a, chrec_b, | |
86df10e3 | 1536 | overlaps_a, overlaps_b, last_conflicts); |
56cf8686 SP |
1537 | |
1538 | else if (evolution_function_is_affine_p (chrec_a) | |
1539 | && evolution_function_is_constant_p (chrec_b)) | |
86df10e3 SP |
1540 | analyze_siv_subscript_cst_affine (chrec_b, chrec_a, |
1541 | overlaps_b, overlaps_a, last_conflicts); | |
56cf8686 SP |
1542 | |
1543 | else if (evolution_function_is_affine_p (chrec_a) | |
86df10e3 | 1544 | && evolution_function_is_affine_p (chrec_b)) |
56cf8686 | 1545 | analyze_subscript_affine_affine (chrec_a, chrec_b, |
86df10e3 | 1546 | overlaps_a, overlaps_b, last_conflicts); |
56cf8686 SP |
1547 | else |
1548 | { | |
1549 | *overlaps_a = chrec_dont_know; | |
1550 | *overlaps_b = chrec_dont_know; | |
86df10e3 | 1551 | *last_conflicts = chrec_dont_know; |
56cf8686 SP |
1552 | } |
1553 | ||
1554 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1555 | fprintf (dump_file, ")\n"); | |
1556 | } | |
1557 | ||
1558 | /* Return true when the evolution steps of an affine CHREC divide the | |
1559 | constant CST. */ | |
1560 | ||
1561 | static bool | |
1562 | chrec_steps_divide_constant_p (tree chrec, | |
1563 | tree cst) | |
1564 | { | |
1565 | switch (TREE_CODE (chrec)) | |
1566 | { | |
1567 | case POLYNOMIAL_CHREC: | |
1568 | return (tree_fold_divides_p (integer_type_node, CHREC_RIGHT (chrec), cst) | |
1569 | && chrec_steps_divide_constant_p (CHREC_LEFT (chrec), cst)); | |
1570 | ||
1571 | default: | |
1572 | /* On the initial condition, return true. */ | |
1573 | return true; | |
1574 | } | |
1575 | } | |
1576 | ||
1577 | /* Analyze a MIV (Multiple Index Variable) subscript. *OVERLAPS_A and | |
1578 | *OVERLAPS_B are initialized to the functions that describe the | |
1579 | relation between the elements accessed twice by CHREC_A and | |
1580 | CHREC_B. For k >= 0, the following property is verified: | |
1581 | ||
1582 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ | |
1583 | ||
1584 | static void | |
1585 | analyze_miv_subscript (tree chrec_a, | |
1586 | tree chrec_b, | |
1587 | tree *overlaps_a, | |
86df10e3 SP |
1588 | tree *overlaps_b, |
1589 | tree *last_conflicts) | |
56cf8686 SP |
1590 | { |
1591 | /* FIXME: This is a MIV subscript, not yet handled. | |
1592 | Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from | |
1593 | (A[i] vs. A[j]). | |
1594 | ||
1595 | In the SIV test we had to solve a Diophantine equation with two | |
1596 | variables. In the MIV case we have to solve a Diophantine | |
1597 | equation with 2*n variables (if the subscript uses n IVs). | |
1598 | */ | |
1599 | tree difference; | |
1600 | ||
1601 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1602 | fprintf (dump_file, "(analyze_miv_subscript \n"); | |
1603 | ||
1604 | difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b); | |
1605 | ||
1606 | if (chrec_zerop (difference)) | |
1607 | { | |
1608 | /* Access functions are the same: all the elements are accessed | |
1609 | in the same order. */ | |
1610 | *overlaps_a = integer_zero_node; | |
1611 | *overlaps_b = integer_zero_node; | |
86df10e3 SP |
1612 | *last_conflicts = number_of_iterations_in_loop |
1613 | (current_loops->parray[CHREC_VARIABLE (chrec_a)]); | |
56cf8686 SP |
1614 | } |
1615 | ||
1616 | else if (evolution_function_is_constant_p (difference) | |
1617 | /* For the moment, the following is verified: | |
1618 | evolution_function_is_affine_multivariate_p (chrec_a) */ | |
1619 | && !chrec_steps_divide_constant_p (chrec_a, difference)) | |
1620 | { | |
1621 | /* testsuite/.../ssa-chrec-33.c | |
1622 | {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2 | |
1623 | ||
1624 | The difference is 1, and the evolution steps are equal to 2, | |
1625 | consequently there are no overlapping elements. */ | |
1626 | *overlaps_a = chrec_known; | |
1627 | *overlaps_b = chrec_known; | |
86df10e3 | 1628 | *last_conflicts = integer_zero_node; |
56cf8686 SP |
1629 | } |
1630 | ||
86df10e3 SP |
1631 | else if (evolution_function_is_affine_multivariate_p (chrec_a) |
1632 | && evolution_function_is_affine_multivariate_p (chrec_b)) | |
56cf8686 SP |
1633 | { |
1634 | /* testsuite/.../ssa-chrec-35.c | |
1635 | {0, +, 1}_2 vs. {0, +, 1}_3 | |
1636 | the overlapping elements are respectively located at iterations: | |
86df10e3 SP |
1637 | {0, +, 1}_x and {0, +, 1}_x, |
1638 | in other words, we have the equality: | |
1639 | {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x) | |
1640 | ||
1641 | Other examples: | |
1642 | {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) = | |
1643 | {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y) | |
1644 | ||
1645 | {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) = | |
1646 | {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) | |
56cf8686 | 1647 | */ |
86df10e3 SP |
1648 | analyze_subscript_affine_affine (chrec_a, chrec_b, |
1649 | overlaps_a, overlaps_b, last_conflicts); | |
56cf8686 SP |
1650 | } |
1651 | ||
1652 | else | |
1653 | { | |
1654 | /* When the analysis is too difficult, answer "don't know". */ | |
1655 | *overlaps_a = chrec_dont_know; | |
1656 | *overlaps_b = chrec_dont_know; | |
86df10e3 | 1657 | *last_conflicts = chrec_dont_know; |
56cf8686 SP |
1658 | } |
1659 | ||
1660 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1661 | fprintf (dump_file, ")\n"); | |
1662 | } | |
1663 | ||
1664 | /* Determines the iterations for which CHREC_A is equal to CHREC_B. | |
1665 | OVERLAP_ITERATIONS_A and OVERLAP_ITERATIONS_B are initialized with | |
1666 | two functions that describe the iterations that contain conflicting | |
1667 | elements. | |
1668 | ||
1669 | Remark: For an integer k >= 0, the following equality is true: | |
1670 | ||
1671 | CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)). | |
1672 | */ | |
1673 | ||
1674 | static void | |
1675 | analyze_overlapping_iterations (tree chrec_a, | |
1676 | tree chrec_b, | |
1677 | tree *overlap_iterations_a, | |
86df10e3 SP |
1678 | tree *overlap_iterations_b, |
1679 | tree *last_conflicts) | |
56cf8686 SP |
1680 | { |
1681 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1682 | { | |
1683 | fprintf (dump_file, "(analyze_overlapping_iterations \n"); | |
1684 | fprintf (dump_file, " (chrec_a = "); | |
1685 | print_generic_expr (dump_file, chrec_a, 0); | |
1686 | fprintf (dump_file, ")\n chrec_b = "); | |
1687 | print_generic_expr (dump_file, chrec_b, 0); | |
1688 | fprintf (dump_file, ")\n"); | |
1689 | } | |
1690 | ||
1691 | if (chrec_a == NULL_TREE | |
1692 | || chrec_b == NULL_TREE | |
1693 | || chrec_contains_undetermined (chrec_a) | |
1694 | || chrec_contains_undetermined (chrec_b) | |
1695 | || chrec_contains_symbols (chrec_a) | |
1696 | || chrec_contains_symbols (chrec_b)) | |
1697 | { | |
1698 | *overlap_iterations_a = chrec_dont_know; | |
1699 | *overlap_iterations_b = chrec_dont_know; | |
1700 | } | |
1701 | ||
1702 | else if (ziv_subscript_p (chrec_a, chrec_b)) | |
1703 | analyze_ziv_subscript (chrec_a, chrec_b, | |
86df10e3 SP |
1704 | overlap_iterations_a, overlap_iterations_b, |
1705 | last_conflicts); | |
56cf8686 SP |
1706 | |
1707 | else if (siv_subscript_p (chrec_a, chrec_b)) | |
1708 | analyze_siv_subscript (chrec_a, chrec_b, | |
86df10e3 SP |
1709 | overlap_iterations_a, overlap_iterations_b, |
1710 | last_conflicts); | |
56cf8686 SP |
1711 | |
1712 | else | |
1713 | analyze_miv_subscript (chrec_a, chrec_b, | |
86df10e3 SP |
1714 | overlap_iterations_a, overlap_iterations_b, |
1715 | last_conflicts); | |
56cf8686 SP |
1716 | |
1717 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1718 | { | |
1719 | fprintf (dump_file, " (overlap_iterations_a = "); | |
1720 | print_generic_expr (dump_file, *overlap_iterations_a, 0); | |
1721 | fprintf (dump_file, ")\n (overlap_iterations_b = "); | |
1722 | print_generic_expr (dump_file, *overlap_iterations_b, 0); | |
1723 | fprintf (dump_file, ")\n"); | |
1724 | } | |
1725 | } | |
1726 | ||
1727 | \f | |
1728 | ||
1729 | /* This section contains the affine functions dependences detector. */ | |
1730 | ||
1731 | /* Computes the conflicting iterations, and initialize DDR. */ | |
1732 | ||
1733 | static void | |
1734 | subscript_dependence_tester (struct data_dependence_relation *ddr) | |
1735 | { | |
1736 | unsigned int i; | |
1737 | struct data_reference *dra = DDR_A (ddr); | |
1738 | struct data_reference *drb = DDR_B (ddr); | |
86df10e3 | 1739 | tree last_conflicts; |
56cf8686 SP |
1740 | |
1741 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1742 | fprintf (dump_file, "(subscript_dependence_tester \n"); | |
1743 | ||
1744 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) | |
1745 | { | |
1746 | tree overlaps_a, overlaps_b; | |
1747 | struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); | |
1748 | ||
1749 | analyze_overlapping_iterations (DR_ACCESS_FN (dra, i), | |
1750 | DR_ACCESS_FN (drb, i), | |
86df10e3 SP |
1751 | &overlaps_a, &overlaps_b, |
1752 | &last_conflicts); | |
56cf8686 SP |
1753 | |
1754 | if (chrec_contains_undetermined (overlaps_a) | |
1755 | || chrec_contains_undetermined (overlaps_b)) | |
1756 | { | |
1757 | finalize_ddr_dependent (ddr, chrec_dont_know); | |
1758 | break; | |
1759 | } | |
1760 | ||
1761 | else if (overlaps_a == chrec_known | |
1762 | || overlaps_b == chrec_known) | |
1763 | { | |
1764 | finalize_ddr_dependent (ddr, chrec_known); | |
1765 | break; | |
1766 | } | |
1767 | ||
1768 | else | |
1769 | { | |
1770 | SUB_CONFLICTS_IN_A (subscript) = overlaps_a; | |
1771 | SUB_CONFLICTS_IN_B (subscript) = overlaps_b; | |
86df10e3 | 1772 | SUB_LAST_CONFLICT (subscript) = last_conflicts; |
56cf8686 SP |
1773 | } |
1774 | } | |
1775 | ||
1776 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1777 | fprintf (dump_file, ")\n"); | |
1778 | } | |
1779 | ||
1780 | /* Compute the classic per loop distance vector. | |
1781 | ||
36d59cf7 | 1782 | DDR is the data dependence relation to build a vector from. |
56cf8686 | 1783 | NB_LOOPS is the total number of loops we are considering. |
1f24dd47 | 1784 | FIRST_LOOP_DEPTH is the loop->depth of the first loop in the analyzed |
464f49d8 DB |
1785 | loop nest. |
1786 | Return FALSE if the dependence relation is outside of the loop nest | |
1f24dd47 | 1787 | starting at FIRST_LOOP_DEPTH. |
464f49d8 | 1788 | Return TRUE otherwise. */ |
56cf8686 | 1789 | |
464f49d8 | 1790 | static bool |
36d59cf7 | 1791 | build_classic_dist_vector (struct data_dependence_relation *ddr, |
1f24dd47 | 1792 | int nb_loops, int first_loop_depth) |
56cf8686 SP |
1793 | { |
1794 | unsigned i; | |
1795 | lambda_vector dist_v, init_v; | |
1796 | ||
1797 | dist_v = lambda_vector_new (nb_loops); | |
1798 | init_v = lambda_vector_new (nb_loops); | |
1799 | lambda_vector_clear (dist_v, nb_loops); | |
1800 | lambda_vector_clear (init_v, nb_loops); | |
1801 | ||
36d59cf7 | 1802 | if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE) |
464f49d8 | 1803 | return true; |
56cf8686 | 1804 | |
36d59cf7 | 1805 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) |
56cf8686 | 1806 | { |
86df10e3 | 1807 | tree access_fn_a, access_fn_b; |
36d59cf7 | 1808 | struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); |
56cf8686 SP |
1809 | |
1810 | if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) | |
86df10e3 SP |
1811 | { |
1812 | non_affine_dependence_relation (ddr); | |
464f49d8 | 1813 | return true; |
86df10e3 SP |
1814 | } |
1815 | ||
1816 | access_fn_a = DR_ACCESS_FN (DDR_A (ddr), i); | |
1817 | access_fn_b = DR_ACCESS_FN (DDR_B (ddr), i); | |
56cf8686 | 1818 | |
86df10e3 SP |
1819 | if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC |
1820 | && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) | |
56cf8686 | 1821 | { |
1f24dd47 | 1822 | int dist, loop_nb, loop_depth; |
86df10e3 SP |
1823 | int loop_nb_a = CHREC_VARIABLE (access_fn_a); |
1824 | int loop_nb_b = CHREC_VARIABLE (access_fn_b); | |
1825 | struct loop *loop_a = current_loops->parray[loop_nb_a]; | |
1826 | struct loop *loop_b = current_loops->parray[loop_nb_b]; | |
464f49d8 | 1827 | |
8b41b1b2 DB |
1828 | /* If the loop for either variable is at a lower depth than |
1829 | the first_loop's depth, then we can't possibly have a | |
464f49d8 DB |
1830 | dependency at this level of the loop. */ |
1831 | ||
1f24dd47 DB |
1832 | if (loop_a->depth < first_loop_depth |
1833 | || loop_b->depth < first_loop_depth) | |
464f49d8 | 1834 | return false; |
86df10e3 SP |
1835 | |
1836 | if (loop_nb_a != loop_nb_b | |
1837 | && !flow_loop_nested_p (loop_a, loop_b) | |
1838 | && !flow_loop_nested_p (loop_b, loop_a)) | |
1839 | { | |
1840 | /* Example: when there are two consecutive loops, | |
1841 | ||
1842 | | loop_1 | |
1843 | | A[{0, +, 1}_1] | |
1844 | | endloop_1 | |
1845 | | loop_2 | |
1846 | | A[{0, +, 1}_2] | |
1847 | | endloop_2 | |
1848 | ||
1849 | the dependence relation cannot be captured by the | |
1850 | distance abstraction. */ | |
1851 | non_affine_dependence_relation (ddr); | |
464f49d8 | 1852 | return true; |
86df10e3 SP |
1853 | } |
1854 | ||
1855 | /* The dependence is carried by the outermost loop. Example: | |
1856 | | loop_1 | |
1857 | | A[{4, +, 1}_1] | |
1858 | | loop_2 | |
1859 | | A[{5, +, 1}_2] | |
1860 | | endloop_2 | |
1861 | | endloop_1 | |
1862 | In this case, the dependence is carried by loop_1. */ | |
1863 | loop_nb = loop_nb_a < loop_nb_b ? loop_nb_a : loop_nb_b; | |
1f24dd47 | 1864 | loop_depth = current_loops->parray[loop_nb]->depth - first_loop_depth; |
86df10e3 | 1865 | |
56cf8686 SP |
1866 | /* If the loop number is still greater than the number of |
1867 | loops we've been asked to analyze, or negative, | |
1868 | something is borked. */ | |
1f24dd47 DB |
1869 | gcc_assert (loop_depth >= 0); |
1870 | gcc_assert (loop_depth < nb_loops); | |
86df10e3 SP |
1871 | if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) |
1872 | { | |
1873 | non_affine_dependence_relation (ddr); | |
464f49d8 | 1874 | return true; |
86df10e3 SP |
1875 | } |
1876 | ||
56cf8686 SP |
1877 | dist = int_cst_value (SUB_DISTANCE (subscript)); |
1878 | ||
1879 | /* This is the subscript coupling test. | |
1880 | | loop i = 0, N, 1 | |
1881 | | T[i+1][i] = ... | |
1882 | | ... = T[i][i] | |
1883 | | endloop | |
1884 | There is no dependence. */ | |
1f24dd47 DB |
1885 | if (init_v[loop_depth] != 0 |
1886 | && dist_v[loop_depth] != dist) | |
56cf8686 | 1887 | { |
36d59cf7 | 1888 | finalize_ddr_dependent (ddr, chrec_known); |
464f49d8 | 1889 | return true; |
56cf8686 SP |
1890 | } |
1891 | ||
1f24dd47 DB |
1892 | dist_v[loop_depth] = dist; |
1893 | init_v[loop_depth] = 1; | |
56cf8686 SP |
1894 | } |
1895 | } | |
1896 | ||
1897 | /* There is a distance of 1 on all the outer loops: | |
1898 | ||
1899 | Example: there is a dependence of distance 1 on loop_1 for the array A. | |
1900 | | loop_1 | |
1901 | | A[5] = ... | |
1902 | | endloop | |
1903 | */ | |
1904 | { | |
1905 | struct loop *lca, *loop_a, *loop_b; | |
36d59cf7 DB |
1906 | struct data_reference *a = DDR_A (ddr); |
1907 | struct data_reference *b = DDR_B (ddr); | |
1f24dd47 | 1908 | int lca_depth; |
56cf8686 SP |
1909 | loop_a = loop_containing_stmt (DR_STMT (a)); |
1910 | loop_b = loop_containing_stmt (DR_STMT (b)); | |
1911 | ||
1912 | /* Get the common ancestor loop. */ | |
1913 | lca = find_common_loop (loop_a, loop_b); | |
1914 | ||
1f24dd47 DB |
1915 | lca_depth = lca->depth; |
1916 | lca_depth -= first_loop_depth; | |
1917 | gcc_assert (lca_depth >= 0); | |
1918 | gcc_assert (lca_depth < nb_loops); | |
86df10e3 | 1919 | |
56cf8686 SP |
1920 | /* For each outer loop where init_v is not set, the accesses are |
1921 | in dependence of distance 1 in the loop. */ | |
1922 | if (lca != loop_a | |
1923 | && lca != loop_b | |
1f24dd47 DB |
1924 | && init_v[lca_depth] == 0) |
1925 | dist_v[lca_depth] = 1; | |
56cf8686 SP |
1926 | |
1927 | lca = lca->outer; | |
1928 | ||
1929 | if (lca) | |
1930 | { | |
1f24dd47 | 1931 | lca_depth = lca->depth - first_loop_depth; |
56cf8686 SP |
1932 | while (lca->depth != 0) |
1933 | { | |
86df10e3 SP |
1934 | /* If we're considering just a sub-nest, then don't record |
1935 | any information on the outer loops. */ | |
1f24dd47 | 1936 | if (lca_depth < 0) |
86df10e3 SP |
1937 | break; |
1938 | ||
1f24dd47 | 1939 | gcc_assert (lca_depth < nb_loops); |
86df10e3 | 1940 | |
1f24dd47 DB |
1941 | if (init_v[lca_depth] == 0) |
1942 | dist_v[lca_depth] = 1; | |
56cf8686 | 1943 | lca = lca->outer; |
1f24dd47 | 1944 | lca_depth = lca->depth - first_loop_depth; |
56cf8686 SP |
1945 | |
1946 | } | |
1947 | } | |
1948 | } | |
1949 | ||
36d59cf7 | 1950 | DDR_DIST_VECT (ddr) = dist_v; |
86df10e3 | 1951 | DDR_SIZE_VECT (ddr) = nb_loops; |
464f49d8 | 1952 | return true; |
56cf8686 SP |
1953 | } |
1954 | ||
1955 | /* Compute the classic per loop direction vector. | |
1956 | ||
36d59cf7 | 1957 | DDR is the data dependence relation to build a vector from. |
56cf8686 | 1958 | NB_LOOPS is the total number of loops we are considering. |
1f24dd47 | 1959 | FIRST_LOOP_DEPTH is the loop->depth of the first loop in the analyzed |
464f49d8 DB |
1960 | loop nest. |
1961 | Return FALSE if the dependence relation is outside of the loop nest | |
1f24dd47 | 1962 | at FIRST_LOOP_DEPTH. |
464f49d8 | 1963 | Return TRUE otherwise. */ |
56cf8686 | 1964 | |
464f49d8 | 1965 | static bool |
36d59cf7 | 1966 | build_classic_dir_vector (struct data_dependence_relation *ddr, |
1f24dd47 | 1967 | int nb_loops, int first_loop_depth) |
56cf8686 SP |
1968 | { |
1969 | unsigned i; | |
1970 | lambda_vector dir_v, init_v; | |
1971 | ||
1972 | dir_v = lambda_vector_new (nb_loops); | |
1973 | init_v = lambda_vector_new (nb_loops); | |
1974 | lambda_vector_clear (dir_v, nb_loops); | |
1975 | lambda_vector_clear (init_v, nb_loops); | |
1976 | ||
36d59cf7 | 1977 | if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE) |
464f49d8 | 1978 | return true; |
56cf8686 | 1979 | |
36d59cf7 | 1980 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) |
56cf8686 | 1981 | { |
86df10e3 | 1982 | tree access_fn_a, access_fn_b; |
36d59cf7 | 1983 | struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); |
56cf8686 | 1984 | |
86df10e3 | 1985 | if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) |
56cf8686 | 1986 | { |
86df10e3 | 1987 | non_affine_dependence_relation (ddr); |
464f49d8 | 1988 | return true; |
86df10e3 SP |
1989 | } |
1990 | ||
1991 | access_fn_a = DR_ACCESS_FN (DDR_A (ddr), i); | |
1992 | access_fn_b = DR_ACCESS_FN (DDR_B (ddr), i); | |
1993 | if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC | |
1994 | && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) | |
1995 | { | |
1f24dd47 | 1996 | int dist, loop_nb, loop_depth; |
56cf8686 | 1997 | enum data_dependence_direction dir = dir_star; |
86df10e3 SP |
1998 | int loop_nb_a = CHREC_VARIABLE (access_fn_a); |
1999 | int loop_nb_b = CHREC_VARIABLE (access_fn_b); | |
2000 | struct loop *loop_a = current_loops->parray[loop_nb_a]; | |
2001 | struct loop *loop_b = current_loops->parray[loop_nb_b]; | |
464f49d8 | 2002 | |
8b41b1b2 DB |
2003 | /* If the loop for either variable is at a lower depth than |
2004 | the first_loop's depth, then we can't possibly have a | |
2005 | dependency at this level of the loop. */ | |
464f49d8 | 2006 | |
1f24dd47 DB |
2007 | if (loop_a->depth < first_loop_depth |
2008 | || loop_b->depth < first_loop_depth) | |
464f49d8 | 2009 | return false; |
86df10e3 SP |
2010 | |
2011 | if (loop_nb_a != loop_nb_b | |
2012 | && !flow_loop_nested_p (loop_a, loop_b) | |
2013 | && !flow_loop_nested_p (loop_b, loop_a)) | |
2014 | { | |
2015 | /* Example: when there are two consecutive loops, | |
2016 | ||
2017 | | loop_1 | |
2018 | | A[{0, +, 1}_1] | |
2019 | | endloop_1 | |
2020 | | loop_2 | |
2021 | | A[{0, +, 1}_2] | |
2022 | | endloop_2 | |
2023 | ||
2024 | the dependence relation cannot be captured by the | |
2025 | distance abstraction. */ | |
2026 | non_affine_dependence_relation (ddr); | |
464f49d8 | 2027 | return true; |
86df10e3 SP |
2028 | } |
2029 | ||
2030 | /* The dependence is carried by the outermost loop. Example: | |
2031 | | loop_1 | |
2032 | | A[{4, +, 1}_1] | |
2033 | | loop_2 | |
2034 | | A[{5, +, 1}_2] | |
2035 | | endloop_2 | |
2036 | | endloop_1 | |
2037 | In this case, the dependence is carried by loop_1. */ | |
2038 | loop_nb = loop_nb_a < loop_nb_b ? loop_nb_a : loop_nb_b; | |
1f24dd47 | 2039 | loop_depth = current_loops->parray[loop_nb]->depth - first_loop_depth; |
56cf8686 SP |
2040 | |
2041 | /* If the loop number is still greater than the number of | |
2042 | loops we've been asked to analyze, or negative, | |
2043 | something is borked. */ | |
1f24dd47 DB |
2044 | gcc_assert (loop_depth >= 0); |
2045 | gcc_assert (loop_depth < nb_loops); | |
86df10e3 SP |
2046 | |
2047 | if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) | |
56cf8686 | 2048 | { |
86df10e3 | 2049 | non_affine_dependence_relation (ddr); |
464f49d8 | 2050 | return true; |
56cf8686 | 2051 | } |
86df10e3 SP |
2052 | |
2053 | dist = int_cst_value (SUB_DISTANCE (subscript)); | |
2054 | ||
2055 | if (dist == 0) | |
2056 | dir = dir_equal; | |
2057 | else if (dist > 0) | |
2058 | dir = dir_positive; | |
2059 | else if (dist < 0) | |
2060 | dir = dir_negative; | |
56cf8686 SP |
2061 | |
2062 | /* This is the subscript coupling test. | |
2063 | | loop i = 0, N, 1 | |
2064 | | T[i+1][i] = ... | |
2065 | | ... = T[i][i] | |
2066 | | endloop | |
2067 | There is no dependence. */ | |
1f24dd47 | 2068 | if (init_v[loop_depth] != 0 |
56cf8686 | 2069 | && dir != dir_star |
1f24dd47 DB |
2070 | && (enum data_dependence_direction) dir_v[loop_depth] != dir |
2071 | && (enum data_dependence_direction) dir_v[loop_depth] != dir_star) | |
56cf8686 | 2072 | { |
36d59cf7 | 2073 | finalize_ddr_dependent (ddr, chrec_known); |
464f49d8 | 2074 | return true; |
56cf8686 SP |
2075 | } |
2076 | ||
1f24dd47 DB |
2077 | dir_v[loop_depth] = dir; |
2078 | init_v[loop_depth] = 1; | |
56cf8686 SP |
2079 | } |
2080 | } | |
2081 | ||
2082 | /* There is a distance of 1 on all the outer loops: | |
2083 | ||
2084 | Example: there is a dependence of distance 1 on loop_1 for the array A. | |
2085 | | loop_1 | |
2086 | | A[5] = ... | |
2087 | | endloop | |
2088 | */ | |
2089 | { | |
2090 | struct loop *lca, *loop_a, *loop_b; | |
36d59cf7 DB |
2091 | struct data_reference *a = DDR_A (ddr); |
2092 | struct data_reference *b = DDR_B (ddr); | |
1f24dd47 | 2093 | int lca_depth; |
56cf8686 SP |
2094 | loop_a = loop_containing_stmt (DR_STMT (a)); |
2095 | loop_b = loop_containing_stmt (DR_STMT (b)); | |
2096 | ||
2097 | /* Get the common ancestor loop. */ | |
2098 | lca = find_common_loop (loop_a, loop_b); | |
1f24dd47 | 2099 | lca_depth = lca->depth - first_loop_depth; |
56cf8686 | 2100 | |
1f24dd47 DB |
2101 | gcc_assert (lca_depth >= 0); |
2102 | gcc_assert (lca_depth < nb_loops); | |
86df10e3 | 2103 | |
56cf8686 SP |
2104 | /* For each outer loop where init_v is not set, the accesses are |
2105 | in dependence of distance 1 in the loop. */ | |
2106 | if (lca != loop_a | |
2107 | && lca != loop_b | |
1f24dd47 DB |
2108 | && init_v[lca_depth] == 0) |
2109 | dir_v[lca_depth] = dir_positive; | |
56cf8686 SP |
2110 | |
2111 | lca = lca->outer; | |
2112 | if (lca) | |
2113 | { | |
1f24dd47 | 2114 | lca_depth = lca->depth - first_loop_depth; |
56cf8686 SP |
2115 | while (lca->depth != 0) |
2116 | { | |
86df10e3 SP |
2117 | /* If we're considering just a sub-nest, then don't record |
2118 | any information on the outer loops. */ | |
1f24dd47 | 2119 | if (lca_depth < 0) |
86df10e3 SP |
2120 | break; |
2121 | ||
1f24dd47 | 2122 | gcc_assert (lca_depth < nb_loops); |
86df10e3 | 2123 | |
1f24dd47 DB |
2124 | if (init_v[lca_depth] == 0) |
2125 | dir_v[lca_depth] = dir_positive; | |
56cf8686 | 2126 | lca = lca->outer; |
1f24dd47 | 2127 | lca_depth = lca->depth - first_loop_depth; |
56cf8686 SP |
2128 | |
2129 | } | |
2130 | } | |
2131 | } | |
2132 | ||
36d59cf7 | 2133 | DDR_DIR_VECT (ddr) = dir_v; |
86df10e3 | 2134 | DDR_SIZE_VECT (ddr) = nb_loops; |
464f49d8 | 2135 | return true; |
56cf8686 SP |
2136 | } |
2137 | ||
2138 | /* Returns true when all the access functions of A are affine or | |
2139 | constant. */ | |
2140 | ||
2141 | static bool | |
2142 | access_functions_are_affine_or_constant_p (struct data_reference *a) | |
2143 | { | |
2144 | unsigned int i; | |
2145 | varray_type fns = DR_ACCESS_FNS (a); | |
2146 | ||
2147 | for (i = 0; i < VARRAY_ACTIVE_SIZE (fns); i++) | |
2148 | if (!evolution_function_is_constant_p (VARRAY_TREE (fns, i)) | |
2149 | && !evolution_function_is_affine_multivariate_p (VARRAY_TREE (fns, i))) | |
2150 | return false; | |
2151 | ||
2152 | return true; | |
2153 | } | |
2154 | ||
2155 | /* This computes the affine dependence relation between A and B. | |
2156 | CHREC_KNOWN is used for representing the independence between two | |
2157 | accesses, while CHREC_DONT_KNOW is used for representing the unknown | |
2158 | relation. | |
2159 | ||
2160 | Note that it is possible to stop the computation of the dependence | |
2161 | relation the first time we detect a CHREC_KNOWN element for a given | |
2162 | subscript. */ | |
2163 | ||
2164 | void | |
2165 | compute_affine_dependence (struct data_dependence_relation *ddr) | |
2166 | { | |
2167 | struct data_reference *dra = DDR_A (ddr); | |
2168 | struct data_reference *drb = DDR_B (ddr); | |
2169 | ||
2170 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2171 | { | |
36d59cf7 | 2172 | fprintf (dump_file, "(compute_affine_dependence\n"); |
56cf8686 SP |
2173 | fprintf (dump_file, " (stmt_a = \n"); |
2174 | print_generic_expr (dump_file, DR_STMT (dra), 0); | |
2175 | fprintf (dump_file, ")\n (stmt_b = \n"); | |
2176 | print_generic_expr (dump_file, DR_STMT (drb), 0); | |
2177 | fprintf (dump_file, ")\n"); | |
2178 | } | |
2179 | ||
2180 | /* Analyze only when the dependence relation is not yet known. */ | |
2181 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) | |
2182 | { | |
2183 | if (access_functions_are_affine_or_constant_p (dra) | |
2184 | && access_functions_are_affine_or_constant_p (drb)) | |
2185 | subscript_dependence_tester (ddr); | |
2186 | ||
2187 | /* As a last case, if the dependence cannot be determined, or if | |
2188 | the dependence is considered too difficult to determine, answer | |
2189 | "don't know". */ | |
2190 | else | |
2191 | finalize_ddr_dependent (ddr, chrec_dont_know); | |
2192 | } | |
2193 | ||
2194 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2195 | fprintf (dump_file, ")\n"); | |
2196 | } | |
2197 | ||
2198 | /* Compute a subset of the data dependence relation graph. Don't | |
2199 | compute read-read relations, and avoid the computation of the | |
89dbed81 | 2200 | opposite relation, i.e. when AB has been computed, don't compute BA. |
56cf8686 SP |
2201 | DATAREFS contains a list of data references, and the result is set |
2202 | in DEPENDENCE_RELATIONS. */ | |
2203 | ||
2204 | static void | |
36d59cf7 DB |
2205 | compute_all_dependences (varray_type datarefs, |
2206 | varray_type *dependence_relations) | |
56cf8686 SP |
2207 | { |
2208 | unsigned int i, j, N; | |
2209 | ||
2210 | N = VARRAY_ACTIVE_SIZE (datarefs); | |
2211 | ||
2212 | for (i = 0; i < N; i++) | |
2213 | for (j = i; j < N; j++) | |
2214 | { | |
2215 | struct data_reference *a, *b; | |
2216 | struct data_dependence_relation *ddr; | |
2217 | ||
2218 | a = VARRAY_GENERIC_PTR (datarefs, i); | |
2219 | b = VARRAY_GENERIC_PTR (datarefs, j); | |
56cf8686 SP |
2220 | ddr = initialize_data_dependence_relation (a, b); |
2221 | ||
2222 | VARRAY_PUSH_GENERIC_PTR (*dependence_relations, ddr); | |
2223 | compute_affine_dependence (ddr); | |
86df10e3 | 2224 | compute_subscript_distance (ddr); |
56cf8686 SP |
2225 | } |
2226 | } | |
2227 | ||
2228 | /* Search the data references in LOOP, and record the information into | |
2229 | DATAREFS. Returns chrec_dont_know when failing to analyze a | |
2230 | difficult case, returns NULL_TREE otherwise. | |
2231 | ||
464f49d8 DB |
2232 | TODO: This function should be made smarter so that it can handle address |
2233 | arithmetic as if they were array accesses, etc. */ | |
56cf8686 | 2234 | |
86df10e3 | 2235 | tree |
56cf8686 SP |
2236 | find_data_references_in_loop (struct loop *loop, varray_type *datarefs) |
2237 | { | |
86df10e3 | 2238 | bool dont_know_node_not_inserted = true; |
ccbdbf0a JL |
2239 | basic_block bb, *bbs; |
2240 | unsigned int i; | |
56cf8686 | 2241 | block_stmt_iterator bsi; |
86df10e3 | 2242 | |
ccbdbf0a JL |
2243 | bbs = get_loop_body (loop); |
2244 | ||
2245 | for (i = 0; i < loop->num_nodes; i++) | |
56cf8686 | 2246 | { |
ccbdbf0a JL |
2247 | bb = bbs[i]; |
2248 | ||
56cf8686 SP |
2249 | for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) |
2250 | { | |
2251 | tree stmt = bsi_stmt (bsi); | |
2252 | stmt_ann_t ann = stmt_ann (stmt); | |
2253 | ||
2254 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2255 | continue; | |
2256 | ||
2257 | if (!VUSE_OPS (ann) | |
2258 | && !V_MUST_DEF_OPS (ann) | |
2259 | && !V_MAY_DEF_OPS (ann)) | |
2260 | continue; | |
464f49d8 | 2261 | |
56cf8686 SP |
2262 | /* In the GIMPLE representation, a modify expression |
2263 | contains a single load or store to memory. */ | |
2264 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) == ARRAY_REF) | |
2265 | VARRAY_PUSH_GENERIC_PTR | |
2266 | (*datarefs, analyze_array (stmt, TREE_OPERAND (stmt, 0), | |
2267 | false)); | |
2268 | ||
2269 | else if (TREE_CODE (TREE_OPERAND (stmt, 1)) == ARRAY_REF) | |
2270 | VARRAY_PUSH_GENERIC_PTR | |
2271 | (*datarefs, analyze_array (stmt, TREE_OPERAND (stmt, 1), | |
2272 | true)); | |
56cf8686 | 2273 | else |
86df10e3 SP |
2274 | { |
2275 | if (dont_know_node_not_inserted) | |
2276 | { | |
2277 | struct data_reference *res; | |
2278 | res = xmalloc (sizeof (struct data_reference)); | |
2279 | DR_STMT (res) = NULL_TREE; | |
2280 | DR_REF (res) = NULL_TREE; | |
2281 | DR_ACCESS_FNS (res) = NULL; | |
2282 | DR_BASE_NAME (res) = NULL; | |
2283 | DR_IS_READ (res) = false; | |
2284 | VARRAY_PUSH_GENERIC_PTR (*datarefs, res); | |
86df10e3 SP |
2285 | dont_know_node_not_inserted = false; |
2286 | } | |
2287 | } | |
2288 | ||
2289 | /* When there are no defs in the loop, the loop is parallel. */ | |
2290 | if (NUM_V_MAY_DEFS (STMT_V_MAY_DEF_OPS (stmt)) > 0 | |
2291 | || NUM_V_MUST_DEFS (STMT_V_MUST_DEF_OPS (stmt)) > 0) | |
2292 | bb->loop_father->parallel_p = false; | |
56cf8686 | 2293 | } |
86df10e3 SP |
2294 | |
2295 | if (bb->loop_father->estimated_nb_iterations == NULL_TREE) | |
2296 | compute_estimated_nb_iterations (bb->loop_father); | |
56cf8686 SP |
2297 | } |
2298 | ||
ccbdbf0a JL |
2299 | free (bbs); |
2300 | ||
86df10e3 | 2301 | return dont_know_node_not_inserted ? NULL_TREE : chrec_dont_know; |
56cf8686 SP |
2302 | } |
2303 | ||
2304 | \f | |
2305 | ||
2306 | /* This section contains all the entry points. */ | |
2307 | ||
2308 | /* Given a loop nest LOOP, the following vectors are returned: | |
2309 | *DATAREFS is initialized to all the array elements contained in this loop, | |
36d59cf7 | 2310 | *DEPENDENCE_RELATIONS contains the relations between the data references. */ |
56cf8686 SP |
2311 | |
2312 | void | |
2313 | compute_data_dependences_for_loop (unsigned nb_loops, | |
2314 | struct loop *loop, | |
2315 | varray_type *datarefs, | |
36d59cf7 | 2316 | varray_type *dependence_relations) |
56cf8686 SP |
2317 | { |
2318 | unsigned int i; | |
464f49d8 | 2319 | varray_type allrelations; |
56cf8686 SP |
2320 | |
2321 | /* If one of the data references is not computable, give up without | |
2322 | spending time to compute other dependences. */ | |
2323 | if (find_data_references_in_loop (loop, datarefs) == chrec_dont_know) | |
2324 | { | |
2325 | struct data_dependence_relation *ddr; | |
2326 | ||
2327 | /* Insert a single relation into dependence_relations: | |
2328 | chrec_dont_know. */ | |
2329 | ddr = initialize_data_dependence_relation (NULL, NULL); | |
2330 | VARRAY_PUSH_GENERIC_PTR (*dependence_relations, ddr); | |
1f24dd47 DB |
2331 | build_classic_dist_vector (ddr, nb_loops, loop->depth); |
2332 | build_classic_dir_vector (ddr, nb_loops, loop->depth); | |
56cf8686 SP |
2333 | return; |
2334 | } | |
2335 | ||
464f49d8 DB |
2336 | VARRAY_GENERIC_PTR_INIT (allrelations, 1, "Data dependence relations"); |
2337 | compute_all_dependences (*datarefs, &allrelations); | |
56cf8686 | 2338 | |
464f49d8 | 2339 | for (i = 0; i < VARRAY_ACTIVE_SIZE (allrelations); i++) |
56cf8686 SP |
2340 | { |
2341 | struct data_dependence_relation *ddr; | |
464f49d8 | 2342 | ddr = VARRAY_GENERIC_PTR (allrelations, i); |
1f24dd47 | 2343 | if (build_classic_dist_vector (ddr, nb_loops, loop->depth)) |
464f49d8 DB |
2344 | { |
2345 | VARRAY_PUSH_GENERIC_PTR (*dependence_relations, ddr); | |
1f24dd47 | 2346 | build_classic_dir_vector (ddr, nb_loops, loop->depth); |
464f49d8 | 2347 | } |
56cf8686 SP |
2348 | } |
2349 | } | |
2350 | ||
2351 | /* Entry point (for testing only). Analyze all the data references | |
2352 | and the dependence relations. | |
2353 | ||
2354 | The data references are computed first. | |
2355 | ||
2356 | A relation on these nodes is represented by a complete graph. Some | |
2357 | of the relations could be of no interest, thus the relations can be | |
2358 | computed on demand. | |
2359 | ||
2360 | In the following function we compute all the relations. This is | |
2361 | just a first implementation that is here for: | |
2362 | - for showing how to ask for the dependence relations, | |
2363 | - for the debugging the whole dependence graph, | |
2364 | - for the dejagnu testcases and maintenance. | |
2365 | ||
2366 | It is possible to ask only for a part of the graph, avoiding to | |
2367 | compute the whole dependence graph. The computed dependences are | |
2368 | stored in a knowledge base (KB) such that later queries don't | |
2369 | recompute the same information. The implementation of this KB is | |
2370 | transparent to the optimizer, and thus the KB can be changed with a | |
2371 | more efficient implementation, or the KB could be disabled. */ | |
2372 | ||
2373 | void | |
2374 | analyze_all_data_dependences (struct loops *loops) | |
2375 | { | |
2376 | unsigned int i; | |
2377 | varray_type datarefs; | |
2378 | varray_type dependence_relations; | |
56cf8686 SP |
2379 | int nb_data_refs = 10; |
2380 | ||
56cf8686 SP |
2381 | VARRAY_GENERIC_PTR_INIT (datarefs, nb_data_refs, "datarefs"); |
2382 | VARRAY_GENERIC_PTR_INIT (dependence_relations, | |
2383 | nb_data_refs * nb_data_refs, | |
2384 | "dependence_relations"); | |
2385 | ||
2386 | /* Compute DDs on the whole function. */ | |
2387 | compute_data_dependences_for_loop (loops->num, loops->parray[0], | |
36d59cf7 | 2388 | &datarefs, &dependence_relations); |
56cf8686 SP |
2389 | |
2390 | if (dump_file) | |
2391 | { | |
2392 | dump_data_dependence_relations (dump_file, dependence_relations); | |
2393 | fprintf (dump_file, "\n\n"); | |
56cf8686 | 2394 | |
86df10e3 SP |
2395 | if (dump_flags & TDF_DETAILS) |
2396 | dump_dist_dir_vectors (dump_file, dependence_relations); | |
56cf8686 | 2397 | |
86df10e3 | 2398 | if (dump_flags & TDF_STATS) |
56cf8686 | 2399 | { |
86df10e3 SP |
2400 | unsigned nb_top_relations = 0; |
2401 | unsigned nb_bot_relations = 0; | |
2402 | unsigned nb_basename_differ = 0; | |
2403 | unsigned nb_chrec_relations = 0; | |
2404 | ||
2405 | for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) | |
2406 | { | |
2407 | struct data_dependence_relation *ddr; | |
2408 | ddr = VARRAY_GENERIC_PTR (dependence_relations, i); | |
56cf8686 | 2409 | |
86df10e3 SP |
2410 | if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr))) |
2411 | nb_top_relations++; | |
56cf8686 | 2412 | |
86df10e3 SP |
2413 | else if (DDR_ARE_DEPENDENT (ddr) == chrec_known) |
2414 | { | |
2415 | struct data_reference *a = DDR_A (ddr); | |
2416 | struct data_reference *b = DDR_B (ddr); | |
2417 | bool differ_p; | |
56cf8686 | 2418 | |
86df10e3 SP |
2419 | if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b) |
2420 | || (array_base_name_differ_p (a, b, &differ_p) && differ_p)) | |
2421 | nb_basename_differ++; | |
2422 | else | |
2423 | nb_bot_relations++; | |
2424 | } | |
56cf8686 | 2425 | |
86df10e3 SP |
2426 | else |
2427 | nb_chrec_relations++; | |
2428 | } | |
56cf8686 | 2429 | |
86df10e3 SP |
2430 | gather_stats_on_scev_database (); |
2431 | } | |
56cf8686 | 2432 | } |
36d59cf7 DB |
2433 | |
2434 | free_dependence_relations (dependence_relations); | |
2435 | free_data_refs (datarefs); | |
2436 | } | |
2437 | ||
2438 | /* Free the memory used by a data dependence relation DDR. */ | |
2439 | ||
2440 | void | |
2441 | free_dependence_relation (struct data_dependence_relation *ddr) | |
2442 | { | |
2443 | if (ddr == NULL) | |
2444 | return; | |
2445 | ||
2446 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_SUBSCRIPTS (ddr)) | |
2447 | varray_clear (DDR_SUBSCRIPTS (ddr)); | |
2448 | free (ddr); | |
2449 | } | |
2450 | ||
2451 | /* Free the memory used by the data dependence relations from | |
2452 | DEPENDENCE_RELATIONS. */ | |
2453 | ||
2454 | void | |
2455 | free_dependence_relations (varray_type dependence_relations) | |
2456 | { | |
2457 | unsigned int i; | |
2458 | if (dependence_relations == NULL) | |
2459 | return; | |
2460 | ||
2461 | for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) | |
2462 | free_dependence_relation (VARRAY_GENERIC_PTR (dependence_relations, i)); | |
56cf8686 | 2463 | varray_clear (dependence_relations); |
56cf8686 SP |
2464 | } |
2465 | ||
36d59cf7 DB |
2466 | /* Free the memory used by the data references from DATAREFS. */ |
2467 | ||
2468 | void | |
2469 | free_data_refs (varray_type datarefs) | |
2470 | { | |
2471 | unsigned int i; | |
2472 | ||
2473 | if (datarefs == NULL) | |
2474 | return; | |
56cf8686 | 2475 | |
36d59cf7 DB |
2476 | for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) |
2477 | { | |
2478 | struct data_reference *dr = (struct data_reference *) | |
2479 | VARRAY_GENERIC_PTR (datarefs, i); | |
01c49ce8 KH |
2480 | if (dr) |
2481 | { | |
2482 | if (DR_ACCESS_FNS (dr)) | |
2483 | varray_clear (DR_ACCESS_FNS (dr)); | |
2484 | free (dr); | |
2485 | } | |
36d59cf7 DB |
2486 | } |
2487 | varray_clear (datarefs); | |
2488 | } | |
86df10e3 | 2489 |