]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/graphite-interchange.c
Partially removing cloog.h and graphite-clast-to-gimple.h where possible. Removing...
[thirdparty/gcc.git] / gcc / graphite-interchange.c
CommitLineData
c6bb733d 1/* Interchange heuristics and transform for loop interchange on
2 polyhedral representation.
3
4 Copyright (C) 2009 Free Software Foundation, Inc.
5 Contributed by Sebastian Pop <sebastian.pop@amd.com> and
6 Harsha Jagasia <harsha.jagasia@amd.com>.
7
8This file is part of GCC.
9
10GCC is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 3, or (at your option)
13any later version.
14
15GCC is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with GCC; see the file COPYING3. If not see
22<http://www.gnu.org/licenses/>. */
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "rtl.h"
30#include "output.h"
31#include "basic-block.h"
32#include "diagnostic.h"
33#include "tree-flow.h"
34#include "toplev.h"
35#include "tree-dump.h"
36#include "timevar.h"
37#include "cfgloop.h"
38#include "tree-chrec.h"
39#include "tree-data-ref.h"
40#include "tree-scalar-evolution.h"
41#include "tree-pass.h"
42#include "domwalk.h"
43#include "value-prof.h"
44#include "pointer-set.h"
45#include "gimple.h"
46#include "params.h"
47
48#ifdef HAVE_cloog
c6bb733d 49#include "ppl_c.h"
50#include "sese.h"
51#include "graphite-ppl.h"
52#include "graphite.h"
53#include "graphite-poly.h"
54
80020e9d 55/* Builds a linear expression, of dimension DIM, representing PDR's
56 memory access:
57
58 L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
59
60 For an array A[10][20] with two subscript locations s0 and s1, the
61 linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
5e18ab2b 62 corresponds to a memory stride of 20.
63
64 OFFSET is a number of dimensions to prepend before the
65 subscript dimensions: s_0, s_1, ..., s_n.
66
67 Thus, the final linear expression has the following format:
68 0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
69 where the expression itself is:
70 c_0 * s_0 + c_1 * s_1 + ... c_n * s_n. */
80020e9d 71
72static ppl_Linear_Expression_t
5e18ab2b 73build_linearized_memory_access (ppl_dimension_type offset, poly_dr_p pdr)
80020e9d 74{
75 ppl_Linear_Expression_t res;
76 ppl_Linear_Expression_t le;
77 ppl_dimension_type i;
78 ppl_dimension_type first = pdr_subscript_dim (pdr, 0);
79 ppl_dimension_type last = pdr_subscript_dim (pdr, PDR_NB_SUBSCRIPTS (pdr));
0ef84e3b 80 mpz_t size, sub_size;
5e18ab2b 81 graphite_dim_t dim = offset + pdr_dim (pdr);
80020e9d 82
83 ppl_new_Linear_Expression_with_dimension (&res, dim);
84
2d6fe479 85 mpz_init (size);
86 mpz_set_si (size, 1);
87 mpz_init (sub_size);
88 mpz_set_si (sub_size, 1);
80020e9d 89
90 for (i = last - 1; i >= first; i--)
91 {
5e18ab2b 92 ppl_set_coef_gmp (res, i + offset, size);
80020e9d 93
5e18ab2b 94 ppl_new_Linear_Expression_with_dimension (&le, dim - offset);
80020e9d 95 ppl_set_coef (le, i, 1);
55c54afb 96 ppl_max_for_le_pointset (PDR_ACCESSES (pdr), le, sub_size);
2d6fe479 97 mpz_mul (size, size, sub_size);
80020e9d 98 ppl_delete_Linear_Expression (le);
99 }
100
2d6fe479 101 mpz_clear (sub_size);
102 mpz_clear (size);
80020e9d 103 return res;
104}
105
3bcc3204 106/* Builds a partial difference equations and inserts them
107 into pointset powerset polyhedron P. Polyhedron is assumed
108 to have the format: T|I|T'|I'|G|S|S'|l1|l2.
109
110 TIME_DEPTH is the time dimension w.r.t. which we are
111 differentiating.
112 OFFSET represents the number of dimensions between
113 columns t_{time_depth} and t'_{time_depth}.
114 DIM_SCTR is the number of scattering dimensions. It is
115 essentially the dimensionality of the T vector.
116
117 The following equations are inserted into the polyhedron P:
118 | t_1 = t_1'
119 | ...
120 | t_{time_depth-1} = t'_{time_depth-1}
121 | t_{time_depth} = t'_{time_depth} + 1
122 | t_{time_depth+1} = t'_{time_depth + 1}
123 | ...
124 | t_{dim_sctr} = t'_{dim_sctr}. */
125
126static void
127build_partial_difference (ppl_Pointset_Powerset_C_Polyhedron_t *p,
128 ppl_dimension_type time_depth,
129 ppl_dimension_type offset,
130 ppl_dimension_type dim_sctr)
131{
132 ppl_Constraint_t new_cstr;
133 ppl_Linear_Expression_t le;
134 ppl_dimension_type i;
135 ppl_dimension_type dim;
136 ppl_Pointset_Powerset_C_Polyhedron_t temp;
137
138 /* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
139 This is the core part of this alogrithm, since this
140 constraint asks for the memory access stride (difference)
141 between two consecutive points in time dimensions. */
142
143 ppl_Pointset_Powerset_C_Polyhedron_space_dimension (*p, &dim);
144 ppl_new_Linear_Expression_with_dimension (&le, dim);
145 ppl_set_coef (le, time_depth, 1);
146 ppl_set_coef (le, time_depth + offset, -1);
147 ppl_set_inhomogeneous (le, 1);
148 ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL);
149 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr);
150 ppl_delete_Linear_Expression (le);
151 ppl_delete_Constraint (new_cstr);
152
153 /* Add equalities:
154 | t1 = t1'
155 | ...
156 | t_{time_depth-1} = t'_{time_depth-1}
157 | t_{time_depth+1} = t'_{time_depth+1}
158 | ...
159 | t_{dim_sctr} = t'_{dim_sctr}
160
161 This means that all the time dimensions are equal except for
162 time_depth, where the constraint is t_{depth} = t'_{depth} + 1
163 step. More to this: we should be carefull not to add equalities
164 to the 'coupled' dimensions, which happens when the one dimension
165 is stripmined dimension, and the other dimension corresponds
166 to the point loop inside stripmined dimension. */
167
168 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p);
169
170 for (i = 0; i < dim_sctr; i++)
171 if (i != time_depth)
172 {
173 ppl_new_Linear_Expression_with_dimension (&le, dim);
174 ppl_set_coef (le, i, 1);
175 ppl_set_coef (le, i + offset, -1);
176 ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL);
177 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (temp, new_cstr);
178
179 if (ppl_Pointset_Powerset_C_Polyhedron_is_empty (temp))
180 {
181 ppl_delete_Pointset_Powerset_C_Polyhedron (temp);
182 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p);
183 }
184 else
185 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr);
186 ppl_delete_Linear_Expression (le);
187 ppl_delete_Constraint (new_cstr);
188 }
189
190 ppl_delete_Pointset_Powerset_C_Polyhedron (temp);
191}
192
193
80020e9d 194/* Set STRIDE to the stride of PDR in memory by advancing by one in
acba5819 195 the loop at DEPTH. */
80020e9d 196
197static void
0ef84e3b 198pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr)
80020e9d 199{
a1190c1b 200 ppl_dimension_type time_depth;
80020e9d 201 ppl_Linear_Expression_t le, lma;
202 ppl_Constraint_t new_cstr;
80020e9d 203 ppl_dimension_type i, *map;
5e18ab2b 204 ppl_Pointset_Powerset_C_Polyhedron_t p1, p2, sctr;
205 graphite_dim_t nb_subscripts = PDR_NB_SUBSCRIPTS (pdr) + 1;
206 poly_bb_p pbb = PDR_PBB (pdr);
207 ppl_dimension_type offset = pbb_nb_scattering_transform (pbb)
208 + pbb_nb_local_vars (pbb)
209 + pbb_dim_iter_domain (pbb);
210 ppl_dimension_type offsetg = offset + pbb_nb_params (pbb);
211 ppl_dimension_type dim_sctr = pbb_nb_scattering_transform (pbb)
212 + pbb_nb_local_vars (pbb);
213 ppl_dimension_type dim_L1 = offset + offsetg + 2 * nb_subscripts;
214 ppl_dimension_type dim_L2 = offset + offsetg + 2 * nb_subscripts + 1;
215 ppl_dimension_type new_dim = offset + offsetg + 2 * nb_subscripts + 2;
216
217 /* The resulting polyhedron should have the following format:
218 T|I|T'|I'|G|S|S'|l1|l2
219 where:
220 | T = t_1..t_{dim_sctr}
221 | I = i_1..i_{dim_iter_domain}
222 | T'= t'_1..t'_{dim_sctr}
223 | I'= i'_1..i'_{dim_iter_domain}
224 | G = g_1..g_{nb_params}
225 | S = s_1..s_{nb_subscripts}
226 | S'= s'_1..s'_{nb_subscripts}
227 | l1 and l2 are scalars.
228
229 Some invariants:
230 offset = dim_sctr + dim_iter_domain + nb_local_vars
231 offsetg = dim_sctr + dim_iter_domain + nb_local_vars + nb_params. */
232
233 /* Construct the T|I|0|0|G|0|0|0|0 part. */
234 {
235 ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron
236 (&sctr, PBB_TRANSFORMED_SCATTERING (pbb));
237 ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed
238 (sctr, 2 * nb_subscripts + 2);
239 ppl_insert_dimensions_pointset (sctr, offset, offset);
240 }
241
242 /* Construct the 0|I|0|0|G|S|0|0|0 part. */
243 {
244 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
245 (&p1, PDR_ACCESSES (pdr));
246 ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed
247 (p1, nb_subscripts + 2);
248 ppl_insert_dimensions_pointset (p1, 0, dim_sctr);
249 ppl_insert_dimensions_pointset (p1, offset, offset);
250 }
251
252 /* Construct the 0|0|0|0|0|S|0|l1|0 part. */
253 {
254 lma = build_linearized_memory_access (offset + dim_sctr, pdr);
255 ppl_set_coef (lma, dim_L1, -1);
256 ppl_new_Constraint (&new_cstr, lma, PPL_CONSTRAINT_TYPE_EQUAL);
257 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p1, new_cstr);
a66257d5 258 ppl_delete_Linear_Expression (lma);
259 ppl_delete_Constraint (new_cstr);
5e18ab2b 260 }
261
dd3d3fae 262 /* Now intersect all the parts to get the polyhedron P1:
5e18ab2b 263 T|I|0|0|G|0|0|0 |0
264 0|I|0|0|G|S|0|0 |0
265 0|0|0|0|0|S|0|l1|0
266 ------------------
267 T|I|0|0|G|S|0|l1|0. */
268
269 ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, sctr);
270 ppl_delete_Pointset_Powerset_C_Polyhedron (sctr);
271
272 /* Build P2, which would have the following form:
273 0|0|T'|I'|G|0|S'|0|l2
274
275 P2 is built, by remapping the P1 polyhedron:
276 T|I|0|0|G|S|0|l1|0
277
278 using the following mapping:
279 T->T'
280 I->I'
281 S->S'
282 l1->l2. */
283 {
284 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
285 (&p2, p1);
286
287 map = ppl_new_id_map (new_dim);
288
dd3d3fae 289 /* TI -> T'I'. */
5e18ab2b 290 for (i = 0; i < offset; i++)
291 ppl_interchange (map, i, i + offset);
292
dd3d3fae 293 /* l1 -> l2. */
5e18ab2b 294 ppl_interchange (map, dim_L1, dim_L2);
295
dd3d3fae 296 /* S -> S'. */
5e18ab2b 297 for (i = 0; i < nb_subscripts; i++)
298 ppl_interchange (map, offset + offsetg + i,
299 offset + offsetg + nb_subscripts + i);
300
301 ppl_Pointset_Powerset_C_Polyhedron_map_space_dimensions (p2, map, new_dim);
302 free (map);
303 }
304
a1190c1b 305 time_depth = psct_dynamic_dim (pbb, depth);
80020e9d 306
307 /* P1 = P1 inter P2. */
a66257d5 308 ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, p2);
3bcc3204 309 build_partial_difference (&p1, time_depth, offset, dim_sctr);
80020e9d 310
311 /* Maximise the expression L2 - L1. */
5e18ab2b 312 {
313 ppl_new_Linear_Expression_with_dimension (&le, new_dim);
314 ppl_set_coef (le, dim_L2, 1);
315 ppl_set_coef (le, dim_L1, -1);
316 ppl_max_for_le_pointset (p1, le, stride);
5e18ab2b 317 }
a66257d5 318
3bcc3204 319 if (dump_file && (dump_flags & TDF_DETAILS))
320 {
2d6fe479 321 char *str;
322 void (*gmp_free) (void *, size_t);
323
3bcc3204 324 fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:",
325 pbb_index (pbb), PDR_ID (pdr), (int) depth);
2d6fe479 326 str = mpz_get_str (0, 10, stride);
327 fprintf (dump_file, " %s ", str);
328 mp_get_memory_functions (NULL, NULL, &gmp_free);
329 (*gmp_free) (str, strlen (str) + 1);
3bcc3204 330 }
331
a66257d5 332 ppl_delete_Pointset_Powerset_C_Polyhedron (p1);
333 ppl_delete_Pointset_Powerset_C_Polyhedron (p2);
334 ppl_delete_Linear_Expression (le);
c6bb733d 335}
336
b33d4eb4 337
a16e8346 338/* Sets STRIDES to the sum of all the strides of the data references
339 accessed in LOOP at DEPTH. */
acba5819 340
341static void
0ef84e3b 342memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides)
acba5819 343{
a16e8346 344 int i, j;
345 lst_p l;
acba5819 346 poly_dr_p pdr;
0ef84e3b 347 mpz_t s, n;
acba5819 348
2d6fe479 349 mpz_init (s);
350 mpz_init (n);
acba5819 351
a16e8346 352 for (j = 0; VEC_iterate (lst_p, LST_SEQ (loop), j, l); j++)
353 if (LST_LOOP_P (l))
b33d4eb4 354 memory_strides_in_loop_1 (l, depth, strides);
a16e8346 355 else
356 for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (LST_PBB (l)), i, pdr); i++)
357 {
b33d4eb4 358 pdr_stride_in_loop (s, depth, pdr);
2d6fe479 359 mpz_set_si (n, PDR_NB_REFS (pdr));
360 mpz_mul (s, s, n);
361 mpz_add (strides, strides, s);
a16e8346 362 }
acba5819 363
2d6fe479 364 mpz_clear (s);
365 mpz_clear (n);
acba5819 366}
367
b33d4eb4 368/* Sets STRIDES to the sum of all the strides of the data references
369 accessed in LOOP at DEPTH. */
370
371static void
0ef84e3b 372memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides)
b33d4eb4 373{
2d6fe479 374 if (mpz_cmp_si (loop->memory_strides, -1) == 0)
b33d4eb4 375 {
2d6fe479 376 mpz_set_si (strides, 0);
b33d4eb4 377 memory_strides_in_loop_1 (loop, depth, strides);
378 }
379 else
2d6fe479 380 mpz_set (strides, loop->memory_strides);
b33d4eb4 381}
382
a16e8346 383/* Return true when the interchange of loops LOOP1 and LOOP2 is
384 profitable.
80020e9d 385
386 Example:
387
388 | int a[100][100];
389 |
390 | int
391 | foo (int N)
392 | {
393 | int j;
394 | int i;
395 |
396 | for (i = 0; i < N; i++)
397 | for (j = 0; j < N; j++)
398 | a[j][2 * i] += 1;
399 |
400 | return a[N][12];
401 | }
402
403 The data access A[j][i] is described like this:
404
405 | i j N a s0 s1 1
406 | 0 0 0 1 0 0 -5 = 0
407 | 0 -1 0 0 1 0 0 = 0
408 |-2 0 0 0 0 1 0 = 0
409 | 0 0 0 0 1 0 0 >= 0
410 | 0 0 0 0 0 1 0 >= 0
411 | 0 0 0 0 -1 0 100 >= 0
412 | 0 0 0 0 0 -1 100 >= 0
413
414 The linearized memory access L to A[100][100] is:
415
416 | i j N a s0 s1 1
417 | 0 0 0 0 100 1 0
418
5e18ab2b 419 TODO: the shown format is not valid as it does not show the fact
420 that the iteration domain "i j" is transformed using the scattering.
421
80020e9d 422 Next, to measure the impact of iterating once in loop "i", we build
423 a maximization problem: first, we add to DR accesses the dimensions
3d695986 424 k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
425 L1 and L2 are the linearized memory access functions.
80020e9d 426
427 | i j N a s0 s1 k s2 s3 L1 L2 D1 1
428 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
429 | 0 -1 0 0 1 0 0 0 0 0 0 0 0 = 0 s0 = j
430 |-2 0 0 0 0 1 0 0 0 0 0 0 0 = 0 s1 = 2 * i
431 | 0 0 0 0 1 0 0 0 0 0 0 0 0 >= 0
432 | 0 0 0 0 0 1 0 0 0 0 0 0 0 >= 0
433 | 0 0 0 0 -1 0 0 0 0 0 0 0 100 >= 0
434 | 0 0 0 0 0 -1 0 0 0 0 0 0 100 >= 0
435 | 0 0 0 0 100 1 0 0 0 -1 0 0 0 = 0 L1 = 100 * s0 + s1
436
437 Then, we generate the polyhedron P2 by interchanging the dimensions
5e18ab2b 438 (s0, s2), (s1, s3), (L1, L2), (k, i)
80020e9d 439
440 | i j N a s0 s1 k s2 s3 L1 L2 D1 1
441 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
442 | 0 -1 0 0 0 0 0 1 0 0 0 0 0 = 0 s2 = j
443 | 0 0 0 0 0 0 -2 0 1 0 0 0 0 = 0 s3 = 2 * k
444 | 0 0 0 0 0 0 0 1 0 0 0 0 0 >= 0
445 | 0 0 0 0 0 0 0 0 1 0 0 0 0 >= 0
446 | 0 0 0 0 0 0 0 -1 0 0 0 0 100 >= 0
447 | 0 0 0 0 0 0 0 0 -1 0 0 0 100 >= 0
448 | 0 0 0 0 0 0 0 100 1 0 -1 0 0 = 0 L2 = 100 * s2 + s3
449
450 then we add to P2 the equality k = i + 1:
451
452 |-1 0 0 0 0 0 1 0 0 0 0 0 -1 = 0 k = i + 1
453
454 and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
455
5e18ab2b 456 Similarly, to determine the impact of one iteration on loop "j", we
80020e9d 457 interchange (k, j), we add "k = j + 1", and we compute D2 the
458 maximal value of the difference.
459
460 Finally, the profitability test is D1 < D2: if in the outer loop
461 the strides are smaller than in the inner loop, then it is
462 profitable to interchange the loops at DEPTH1 and DEPTH2. */
c6bb733d 463
464static bool
a16e8346 465lst_interchange_profitable_p (lst_p loop1, lst_p loop2)
c6bb733d 466{
0ef84e3b 467 mpz_t d1, d2;
c6bb733d 468 bool res;
469
a16e8346 470 gcc_assert (loop1 && loop2
471 && LST_LOOP_P (loop1) && LST_LOOP_P (loop2)
472 && lst_depth (loop1) < lst_depth (loop2));
c6bb733d 473
2d6fe479 474 mpz_init (d1);
475 mpz_init (d2);
c6bb733d 476
a16e8346 477 memory_strides_in_loop (loop1, lst_depth (loop1), d1);
478 memory_strides_in_loop (loop2, lst_depth (loop2), d2);
c6bb733d 479
6955599b 480 res = mpz_cmp (d1, d2) < 0;
c6bb733d 481
2d6fe479 482 mpz_clear (d1);
483 mpz_clear (d2);
c6bb733d 484
485 return res;
486}
487
488/* Interchanges the loops at DEPTH1 and DEPTH2 of the original
489 scattering and assigns the resulting polyhedron to the transformed
490 scattering. */
491
492static void
5e18ab2b 493pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
494 poly_bb_p pbb)
c6bb733d 495{
496 ppl_dimension_type i, dim;
497 ppl_dimension_type *map;
498 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
5e18ab2b 499 ppl_dimension_type dim1 = psct_dynamic_dim (pbb, depth1);
500 ppl_dimension_type dim2 = psct_dynamic_dim (pbb, depth2);
c6bb733d 501
502 ppl_Polyhedron_space_dimension (poly, &dim);
503 map = (ppl_dimension_type *) XNEWVEC (ppl_dimension_type, dim);
504
505 for (i = 0; i < dim; i++)
506 map[i] = i;
507
508 map[dim1] = dim2;
509 map[dim2] = dim1;
510
511 ppl_Polyhedron_map_space_dimensions (poly, map, dim);
512 free (map);
513}
514
4d0382bc 515/* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
516 the statements below LST. */
517
518static void
519lst_apply_interchange (lst_p lst, int depth1, int depth2)
520{
521 if (!lst)
522 return;
523
524 if (LST_LOOP_P (lst))
525 {
526 int i;
527 lst_p l;
528
529 for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
530 lst_apply_interchange (l, depth1, depth2);
531 }
532 else
533 pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
534}
535
06ced013 536/* Return true when the nest starting at LOOP1 and ending on LOOP2 is
537 perfect: i.e. there are no sequence of statements. */
538
539static bool
540lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
541{
542 if (loop1 == loop2)
543 return true;
544
545 if (!LST_LOOP_P (loop1))
546 return false;
547
548 return VEC_length (lst_p, LST_SEQ (loop1)) == 1
549 && lst_perfectly_nested_p (VEC_index (lst_p, LST_SEQ (loop1), 0), loop2);
550}
551
552/* Transform the loop nest between LOOP1 and LOOP2 into a perfect
553 nest. To continue the naming tradition, this function is called
87d25ca7 554 after perfect_nestify. NEST is set to the perfectly nested loop
555 that is created. BEFORE/AFTER are set to the loops distributed
556 before/after the loop NEST. */
06ced013 557
558static void
87d25ca7 559lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
560 lst_p *nest, lst_p *after)
06ced013 561{
06ced013 562 poly_bb_p first, last;
563
564 gcc_assert (loop1 && loop2
565 && loop1 != loop2
566 && LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
567
568 first = LST_PBB (lst_find_first_pbb (loop2));
569 last = LST_PBB (lst_find_last_pbb (loop2));
570
87d25ca7 571 *before = copy_lst (loop1);
572 *nest = copy_lst (loop1);
573 *after = copy_lst (loop1);
06ced013 574
87d25ca7 575 lst_remove_all_before_including_pbb (*before, first, false);
576 lst_remove_all_before_including_pbb (*after, last, true);
06ced013 577
87d25ca7 578 lst_remove_all_before_excluding_pbb (*nest, first, true);
579 lst_remove_all_before_excluding_pbb (*nest, last, false);
673c512e 580
581 if (lst_empty_p (*before))
cfd3faac 582 {
583 free_lst (*before);
584 *before = NULL;
585 }
673c512e 586 if (lst_empty_p (*after))
cfd3faac 587 {
588 free_lst (*after);
589 *after = NULL;
590 }
673c512e 591 if (lst_empty_p (*nest))
cfd3faac 592 {
593 free_lst (*nest);
594 *nest = NULL;
595 }
06ced013 596}
4d0382bc 597
598/* Try to interchange LOOP1 with LOOP2 for all the statements of the
599 body of LOOP2. LOOP1 contains LOOP2. Return true if it did the
a1a6700b 600 interchange. */
4d0382bc 601
602static bool
a1a6700b 603lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
4d0382bc 604{
605 int depth1 = lst_depth (loop1);
606 int depth2 = lst_depth (loop2);
87d25ca7 607 lst_p transformed;
608
a1a6700b 609 lst_p before = NULL, nest = NULL, after = NULL;
4d0382bc 610
a16e8346 611 if (!lst_interchange_profitable_p (loop1, loop2))
4d0382bc 612 return false;
613
06ced013 614 if (!lst_perfectly_nested_p (loop1, loop2))
a1a6700b 615 lst_perfect_nestify (loop1, loop2, &before, &nest, &after);
06ced013 616
4d0382bc 617 lst_apply_interchange (loop2, depth1, depth2);
618
87d25ca7 619 /* Sync the transformed LST information and the PBB scatterings
620 before using the scatterings in the data dependence analysis. */
a1a6700b 621 if (before || nest || after)
87d25ca7 622 {
623 transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
a1a6700b 624 before, nest, after);
87d25ca7 625 lst_update_scattering (transformed);
626 free_lst (transformed);
627 }
628
4d0382bc 629 if (graphite_legal_transform (scop))
630 {
631 if (dump_file && (dump_flags & TDF_DETAILS))
632 fprintf (dump_file,
633 "Loops at depths %d and %d will be interchanged.\n",
634 depth1, depth2);
635
87d25ca7 636 /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP. */
a1a6700b 637 lst_insert_in_sequence (before, loop1, true);
638 lst_insert_in_sequence (after, loop1, false);
87d25ca7 639
a1a6700b 640 if (nest)
87d25ca7 641 {
a1a6700b 642 lst_replace (loop1, nest);
87d25ca7 643 free_lst (loop1);
644 }
645
4d0382bc 646 return true;
647 }
648
649 /* Undo the transform. */
cfd3faac 650 free_lst (before);
651 free_lst (nest);
652 free_lst (after);
4d0382bc 653 lst_apply_interchange (loop2, depth2, depth1);
654 return false;
655}
656
673c512e 657/* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged
658 with the loop OUTER in LST_SEQ (OUTER_FATHER). */
4d0382bc 659
660static bool
673c512e 661lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer,
662 lst_p inner_father)
4d0382bc 663{
673c512e 664 int inner;
a1a6700b 665 lst_p loop1, loop2;
87d25ca7 666
673c512e 667 gcc_assert (outer_father
668 && LST_LOOP_P (outer_father)
669 && LST_LOOP_P (VEC_index (lst_p, LST_SEQ (outer_father), outer))
670 && inner_father
671 && LST_LOOP_P (inner_father));
4d0382bc 672
a1a6700b 673 loop1 = VEC_index (lst_p, LST_SEQ (outer_father), outer);
87d25ca7 674
a1a6700b 675 for (inner = 0; VEC_iterate (lst_p, LST_SEQ (inner_father), inner, loop2); inner++)
676 if (LST_LOOP_P (loop2)
677 && (lst_try_interchange_loops (scop, loop1, loop2)
678 || lst_interchange_select_inner (scop, outer_father, outer, loop2)))
679 return true;
680
681 return false;
87d25ca7 682}
4d0382bc 683
87d25ca7 684/* Interchanges all the loops of LOOP and the loops of its body that
685 are considered profitable to interchange. Return true if it did
673c512e 686 interchanged some loops. OUTER is the index in LST_SEQ (LOOP) that
687 points to the next outer loop to be considered for interchange. */
4d0382bc 688
87d25ca7 689static bool
673c512e 690lst_interchange_select_outer (scop_p scop, lst_p loop, int outer)
87d25ca7 691{
692 lst_p l;
693 bool res = false;
673c512e 694 int i = 0;
695 lst_p father;
4d0382bc 696
87d25ca7 697 if (!loop || !LST_LOOP_P (loop))
698 return false;
4d0382bc 699
673c512e 700 father = LST_LOOP_FATHER (loop);
701 if (father)
702 {
a1a6700b 703 while (lst_interchange_select_inner (scop, father, outer, loop))
704 {
705 res = true;
706 loop = VEC_index (lst_p, LST_SEQ (father), outer);
707 }
673c512e 708 }
709
710 if (LST_LOOP_P (loop))
711 for (i = 0; VEC_iterate (lst_p, LST_SEQ (loop), i, l); i++)
712 if (LST_LOOP_P (l))
713 res |= lst_interchange_select_outer (scop, l, i);
87d25ca7 714
87d25ca7 715 return res;
c6bb733d 716}
717
718/* Interchanges all the loop depths that are considered profitable for SCOP. */
719
720bool
721scop_do_interchange (scop_p scop)
722{
673c512e 723 bool res = lst_interchange_select_outer
724 (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0);
87d25ca7 725
726 lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
a741358d 727
06ced013 728 return res;
c6bb733d 729}
730
4d0382bc 731
c6bb733d 732#endif
733