]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/graphite-interchange.c
genattrtab.c (write_header): Include hash-set.h...
[thirdparty/gcc.git] / gcc / graphite-interchange.c
CommitLineData
2abae5f1
SP
1/* Interchange heuristics and transform for loop interchange on
2 polyhedral representation.
3
5624e564 4 Copyright (C) 2009-2015 Free Software Foundation, Inc.
2abae5f1
SP
5 Contributed by Sebastian Pop <sebastian.pop@amd.com> and
6 Harsha Jagasia <harsha.jagasia@amd.com>.
7
8This file is part of GCC.
9
10GCC is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 3, or (at your option)
13any later version.
14
15GCC is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with GCC; see the file COPYING3. If not see
22<http://www.gnu.org/licenses/>. */
33ad93b9 23
2abae5f1 24#include "config.h"
33ad93b9 25
eae1a5d4 26#ifdef HAVE_isl
33ad93b9
RG
27#include <isl/aff.h>
28#include <isl/set.h>
29#include <isl/map.h>
30#include <isl/union_map.h>
31#include <isl/ilp.h>
b47595f7 32#include <isl/val.h>
797d8858
TB
33
34/* Since ISL-0.13, the extern is in val_gmp.h. */
35#if !defined(HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE) && defined(__cplusplus)
b47595f7
MN
36extern "C" {
37#endif
38#include <isl/val_gmp.h>
797d8858 39#if !defined(HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE) && defined(__cplusplus)
b47595f7
MN
40}
41#endif
eae1a5d4 42#endif
33ad93b9 43
2abae5f1
SP
44#include "system.h"
45#include "coretypes.h"
60393bbc
AM
46#include "hash-set.h"
47#include "machmode.h"
40e23961
MC
48#include "vec.h"
49#include "double-int.h"
50#include "input.h"
51#include "alias.h"
52#include "symtab.h"
53#include "options.h"
54#include "wide-int.h"
55#include "inchash.h"
56#include "tree.h"
57#include "fold-const.h"
58#include "predict.h"
60393bbc
AM
59#include "tm.h"
60#include "hard-reg-set.h"
61#include "input.h"
62#include "function.h"
63#include "dominance.h"
64#include "cfg.h"
2fb9a547
AM
65#include "basic-block.h"
66#include "tree-ssa-alias.h"
67#include "internal-fn.h"
68#include "gimple-expr.h"
69#include "is-a.h"
442b4905 70#include "gimple.h"
5be5c238 71#include "gimple-iterator.h"
442b4905 72#include "tree-ssa-loop.h"
7ee2468b 73#include "dumpfile.h"
2abae5f1
SP
74#include "cfgloop.h"
75#include "tree-chrec.h"
76#include "tree-data-ref.h"
77#include "tree-scalar-evolution.h"
1bd6497c 78#include "sese.h"
2abae5f1 79
eae1a5d4 80#ifdef HAVE_isl
2abae5f1
SP
81#include "graphite-poly.h"
82
33ad93b9 83/* XXX isl rewrite following comment */
fb9fb290
SP
84/* Builds a linear expression, of dimension DIM, representing PDR's
85 memory access:
86
87 L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
88
89 For an array A[10][20] with two subscript locations s0 and s1, the
90 linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
b0c7a278
KT
91 corresponds to a memory stride of 20.
92
93 OFFSET is a number of dimensions to prepend before the
94 subscript dimensions: s_0, s_1, ..., s_n.
95
96 Thus, the final linear expression has the following format:
97 0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
98 where the expression itself is:
99 c_0 * s_0 + c_1 * s_1 + ... c_n * s_n. */
fb9fb290 100
33ad93b9
RG
101static isl_constraint *
102build_linearized_memory_access (isl_map *map, poly_dr_p pdr)
fb9fb290 103{
33ad93b9
RG
104 isl_constraint *res;
105 isl_local_space *ls = isl_local_space_from_space (isl_map_get_space (map));
106 unsigned offset, nsubs;
107 int i;
b47595f7
MN
108 isl_ctx *ctx;
109
110 isl_val *size, *subsize, *size1;
33ad93b9
RG
111
112 res = isl_equality_alloc (ls);
b47595f7
MN
113 ctx = isl_local_space_get_ctx (ls);
114 size = isl_val_int_from_ui (ctx, 1);
33ad93b9
RG
115
116 nsubs = isl_set_dim (pdr->extent, isl_dim_set);
117 /* -1 for the already included L dimension. */
118 offset = isl_map_dim (map, isl_dim_out) - 1 - nsubs;
119 res = isl_constraint_set_coefficient_si (res, isl_dim_out, offset + nsubs, -1);
120 /* Go through all subscripts from last to first. First dimension
121 is the alias set, ignore it. */
122 for (i = nsubs - 1; i >= 1; i--)
fb9fb290 123 {
33ad93b9
RG
124 isl_space *dc;
125 isl_aff *aff;
126
b47595f7
MN
127 size1 = isl_val_copy (size);
128 res = isl_constraint_set_coefficient_val (res, isl_dim_out, offset + i, size);
33ad93b9
RG
129 dc = isl_set_get_space (pdr->extent);
130 aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
131 aff = isl_aff_set_coefficient_si (aff, isl_dim_in, i, 1);
b47595f7 132 subsize = isl_set_max_val (pdr->extent, aff);
33ad93b9 133 isl_aff_free (aff);
b47595f7 134 size = isl_val_mul (size1, subsize);
fb9fb290
SP
135 }
136
b47595f7 137 isl_val_free (size);
33ad93b9 138
fb9fb290
SP
139 return res;
140}
141
33ad93b9
RG
142/* Set STRIDE to the stride of PDR in memory by advancing by one in
143 the loop at DEPTH. */
2bc529bf
KT
144
145static void
33ad93b9 146pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr)
2bc529bf 147{
33ad93b9
RG
148 poly_bb_p pbb = PDR_PBB (pdr);
149 isl_map *map;
150 isl_set *set;
151 isl_aff *aff;
152 isl_space *dc;
153 isl_constraint *lma, *c;
b47595f7 154 isl_val *islstride;
33ad93b9
RG
155 graphite_dim_t time_depth;
156 unsigned offset, nt;
157 unsigned i;
158 /* XXX isl rewrite following comments. */
159 /* Builds a partial difference equations and inserts them
160 into pointset powerset polyhedron P. Polyhedron is assumed
161 to have the format: T|I|T'|I'|G|S|S'|l1|l2.
162
163 TIME_DEPTH is the time dimension w.r.t. which we are
164 differentiating.
165 OFFSET represents the number of dimensions between
166 columns t_{time_depth} and t'_{time_depth}.
167 DIM_SCTR is the number of scattering dimensions. It is
168 essentially the dimensionality of the T vector.
169
170 The following equations are inserted into the polyhedron P:
171 | t_1 = t_1'
172 | ...
173 | t_{time_depth-1} = t'_{time_depth-1}
174 | t_{time_depth} = t'_{time_depth} + 1
175 | t_{time_depth+1} = t'_{time_depth + 1}
176 | ...
177 | t_{dim_sctr} = t'_{dim_sctr}. */
2bc529bf
KT
178
179 /* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
180 This is the core part of this alogrithm, since this
181 constraint asks for the memory access stride (difference)
182 between two consecutive points in time dimensions. */
183
2bc529bf
KT
184 /* Add equalities:
185 | t1 = t1'
186 | ...
187 | t_{time_depth-1} = t'_{time_depth-1}
188 | t_{time_depth+1} = t'_{time_depth+1}
189 | ...
190 | t_{dim_sctr} = t'_{dim_sctr}
191
192 This means that all the time dimensions are equal except for
193 time_depth, where the constraint is t_{depth} = t'_{depth} + 1
073a8998 194 step. More to this: we should be careful not to add equalities
2bc529bf
KT
195 to the 'coupled' dimensions, which happens when the one dimension
196 is stripmined dimension, and the other dimension corresponds
197 to the point loop inside stripmined dimension. */
198
33ad93b9
RG
199 /* pdr->accesses: [P1..nb_param,I1..nb_domain]->[a,S1..nb_subscript]
200 ??? [P] not used for PDRs?
201 pdr->extent: [a,S1..nb_subscript]
202 pbb->domain: [P1..nb_param,I1..nb_domain]
203 pbb->transformed: [P1..nb_param,I1..nb_domain]->[T1..Tnb_sctr]
204 [T] includes local vars (currently unused)
205
206 First we create [P,I] -> [T,a,S]. */
207
208 map = isl_map_flat_range_product (isl_map_copy (pbb->transformed),
209 isl_map_copy (pdr->accesses));
210 /* Add a dimension for L: [P,I] -> [T,a,S,L].*/
211 map = isl_map_add_dims (map, isl_dim_out, 1);
212 /* Build a constraint for "lma[S] - L == 0", effectively calculating
213 L in terms of subscripts. */
214 lma = build_linearized_memory_access (map, pdr);
215 /* And add it to the map, so we now have:
216 [P,I] -> [T,a,S,L] : lma([S]) == L. */
217 map = isl_map_add_constraint (map, lma);
218
219 /* Then we create [P,I,P',I'] -> [T,a,S,L,T',a',S',L']. */
220 map = isl_map_flat_product (map, isl_map_copy (map));
221
222 /* Now add the equality T[time_depth] == T'[time_depth]+1. This will
223 force L' to be the linear address at T[time_depth] + 1. */
224 time_depth = psct_dynamic_dim (pbb, depth);
225 /* Length of [a,S] plus [L] ... */
226 offset = 1 + isl_map_dim (pdr->accesses, isl_dim_out);
227 /* ... plus [T]. */
228 offset += isl_map_dim (pbb->transformed, isl_dim_out);
229
230 c = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space (map)));
231 c = isl_constraint_set_coefficient_si (c, isl_dim_out, time_depth, 1);
232 c = isl_constraint_set_coefficient_si (c, isl_dim_out,
233 offset + time_depth, -1);
234 c = isl_constraint_set_constant_si (c, 1);
235 map = isl_map_add_constraint (map, c);
236
237 /* Now we equate most of the T/T' elements (making PITaSL nearly
238 the same is (PITaSL)', except for one dimension, namely for 'depth'
239 (an index into [I]), after translating to index into [T]. Take care
240 to not produce an empty map, which indicates we wanted to equate
241 two dimensions that are already coupled via the above time_depth
242 dimension. Happens with strip mining where several scatter dimension
243 are interdependend. */
244 /* Length of [T]. */
245 nt = pbb_nb_scattering_transform (pbb) + pbb_nb_local_vars (pbb);
246 for (i = 0; i < nt; i++)
2bc529bf
KT
247 if (i != time_depth)
248 {
33ad93b9
RG
249 isl_map *temp = isl_map_equate (isl_map_copy (map),
250 isl_dim_out, i,
251 isl_dim_out, offset + i);
252 if (isl_map_is_empty (temp))
253 isl_map_free (temp);
254 else
255 {
256 isl_map_free (map);
257 map = temp;
258 }
2bc529bf
KT
259 }
260
33ad93b9
RG
261 /* Now maximize the expression L' - L. */
262 set = isl_map_range (map);
263 dc = isl_set_get_space (set);
264 aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
265 aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset - 1, -1);
266 aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset + offset - 1, 1);
b47595f7
MN
267 islstride = isl_set_max_val (set, aff);
268 isl_val_get_num_gmp (islstride, stride);
269 isl_val_free (islstride);
33ad93b9
RG
270 isl_aff_free (aff);
271 isl_set_free (set);
67255edf 272
2bc529bf
KT
273 if (dump_file && (dump_flags & TDF_DETAILS))
274 {
8ac16127
MG
275 gmp_fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d: %Zd ",
276 pbb_index (pbb), PDR_ID (pdr), (int) depth, stride);
2bc529bf 277 }
2abae5f1
SP
278}
279
bf69e754
SP
280/* Sets STRIDES to the sum of all the strides of the data references
281 accessed in LOOP at DEPTH. */
aec12420
SP
282
283static void
e262fdda 284memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides)
aec12420 285{
bf69e754
SP
286 int i, j;
287 lst_p l;
aec12420 288 poly_dr_p pdr;
e262fdda 289 mpz_t s, n;
aec12420 290
a0bb35c7
AS
291 mpz_init (s);
292 mpz_init (n);
aec12420 293
9771b263 294 FOR_EACH_VEC_ELT (LST_SEQ (loop), j, l)
bf69e754 295 if (LST_LOOP_P (l))
eaffa762 296 memory_strides_in_loop_1 (l, depth, strides);
bf69e754 297 else
9771b263 298 FOR_EACH_VEC_ELT (PBB_DRS (LST_PBB (l)), i, pdr)
bf69e754 299 {
eaffa762 300 pdr_stride_in_loop (s, depth, pdr);
a0bb35c7
AS
301 mpz_set_si (n, PDR_NB_REFS (pdr));
302 mpz_mul (s, s, n);
303 mpz_add (strides, strides, s);
bf69e754 304 }
aec12420 305
a0bb35c7
AS
306 mpz_clear (s);
307 mpz_clear (n);
aec12420
SP
308}
309
eaffa762
SP
310/* Sets STRIDES to the sum of all the strides of the data references
311 accessed in LOOP at DEPTH. */
312
313static void
e262fdda 314memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides)
eaffa762 315{
a0bb35c7 316 if (mpz_cmp_si (loop->memory_strides, -1) == 0)
eaffa762 317 {
a0bb35c7 318 mpz_set_si (strides, 0);
eaffa762
SP
319 memory_strides_in_loop_1 (loop, depth, strides);
320 }
321 else
a0bb35c7 322 mpz_set (strides, loop->memory_strides);
eaffa762
SP
323}
324
bf69e754
SP
325/* Return true when the interchange of loops LOOP1 and LOOP2 is
326 profitable.
fb9fb290
SP
327
328 Example:
329
330 | int a[100][100];
331 |
332 | int
333 | foo (int N)
334 | {
335 | int j;
336 | int i;
337 |
338 | for (i = 0; i < N; i++)
339 | for (j = 0; j < N; j++)
340 | a[j][2 * i] += 1;
341 |
342 | return a[N][12];
343 | }
344
345 The data access A[j][i] is described like this:
346
347 | i j N a s0 s1 1
348 | 0 0 0 1 0 0 -5 = 0
349 | 0 -1 0 0 1 0 0 = 0
350 |-2 0 0 0 0 1 0 = 0
351 | 0 0 0 0 1 0 0 >= 0
352 | 0 0 0 0 0 1 0 >= 0
353 | 0 0 0 0 -1 0 100 >= 0
354 | 0 0 0 0 0 -1 100 >= 0
355
356 The linearized memory access L to A[100][100] is:
357
358 | i j N a s0 s1 1
359 | 0 0 0 0 100 1 0
360
b0c7a278
KT
361 TODO: the shown format is not valid as it does not show the fact
362 that the iteration domain "i j" is transformed using the scattering.
363
fb9fb290
SP
364 Next, to measure the impact of iterating once in loop "i", we build
365 a maximization problem: first, we add to DR accesses the dimensions
009150e1
SP
366 k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
367 L1 and L2 are the linearized memory access functions.
fb9fb290
SP
368
369 | i j N a s0 s1 k s2 s3 L1 L2 D1 1
370 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
371 | 0 -1 0 0 1 0 0 0 0 0 0 0 0 = 0 s0 = j
372 |-2 0 0 0 0 1 0 0 0 0 0 0 0 = 0 s1 = 2 * i
373 | 0 0 0 0 1 0 0 0 0 0 0 0 0 >= 0
374 | 0 0 0 0 0 1 0 0 0 0 0 0 0 >= 0
375 | 0 0 0 0 -1 0 0 0 0 0 0 0 100 >= 0
376 | 0 0 0 0 0 -1 0 0 0 0 0 0 100 >= 0
377 | 0 0 0 0 100 1 0 0 0 -1 0 0 0 = 0 L1 = 100 * s0 + s1
378
379 Then, we generate the polyhedron P2 by interchanging the dimensions
b0c7a278 380 (s0, s2), (s1, s3), (L1, L2), (k, i)
fb9fb290
SP
381
382 | i j N a s0 s1 k s2 s3 L1 L2 D1 1
383 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
384 | 0 -1 0 0 0 0 0 1 0 0 0 0 0 = 0 s2 = j
385 | 0 0 0 0 0 0 -2 0 1 0 0 0 0 = 0 s3 = 2 * k
386 | 0 0 0 0 0 0 0 1 0 0 0 0 0 >= 0
387 | 0 0 0 0 0 0 0 0 1 0 0 0 0 >= 0
388 | 0 0 0 0 0 0 0 -1 0 0 0 0 100 >= 0
389 | 0 0 0 0 0 0 0 0 -1 0 0 0 100 >= 0
390 | 0 0 0 0 0 0 0 100 1 0 -1 0 0 = 0 L2 = 100 * s2 + s3
391
392 then we add to P2 the equality k = i + 1:
393
394 |-1 0 0 0 0 0 1 0 0 0 0 0 -1 = 0 k = i + 1
395
396 and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
397
b0c7a278 398 Similarly, to determine the impact of one iteration on loop "j", we
fb9fb290
SP
399 interchange (k, j), we add "k = j + 1", and we compute D2 the
400 maximal value of the difference.
401
402 Finally, the profitability test is D1 < D2: if in the outer loop
403 the strides are smaller than in the inner loop, then it is
404 profitable to interchange the loops at DEPTH1 and DEPTH2. */
2abae5f1
SP
405
406static bool
92d23680 407lst_interchange_profitable_p (lst_p nest, int depth1, int depth2)
2abae5f1 408{
e262fdda 409 mpz_t d1, d2;
2abae5f1
SP
410 bool res;
411
92d23680 412 gcc_assert (depth1 < depth2);
2abae5f1 413
a0bb35c7
AS
414 mpz_init (d1);
415 mpz_init (d2);
2abae5f1 416
92d23680
SP
417 memory_strides_in_loop (nest, depth1, d1);
418 memory_strides_in_loop (nest, depth2, d2);
2abae5f1 419
589ac63c 420 res = mpz_cmp (d1, d2) < 0;
2abae5f1 421
a0bb35c7
AS
422 mpz_clear (d1);
423 mpz_clear (d2);
2abae5f1
SP
424
425 return res;
426}
427
428/* Interchanges the loops at DEPTH1 and DEPTH2 of the original
429 scattering and assigns the resulting polyhedron to the transformed
430 scattering. */
431
432static void
b0c7a278
KT
433pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
434 poly_bb_p pbb)
2abae5f1 435{
33ad93b9
RG
436 unsigned i;
437 unsigned dim1 = psct_dynamic_dim (pbb, depth1);
438 unsigned dim2 = psct_dynamic_dim (pbb, depth2);
439 isl_space *d = isl_map_get_space (pbb->transformed);
440 isl_space *d1 = isl_space_range (d);
441 unsigned n = isl_space_dim (d1, isl_dim_out);
442 isl_space *d2 = isl_space_add_dims (d1, isl_dim_in, n);
443 isl_map *x = isl_map_universe (d2);
444
445 x = isl_map_equate (x, isl_dim_in, dim1, isl_dim_out, dim2);
446 x = isl_map_equate (x, isl_dim_in, dim2, isl_dim_out, dim1);
447
448 for (i = 0; i < n; i++)
449 if (i != dim1 && i != dim2)
450 x = isl_map_equate (x, isl_dim_in, i, isl_dim_out, i);
451
452 pbb->transformed = isl_map_apply_range (pbb->transformed, x);
2abae5f1
SP
453}
454
95baeff8
SP
455/* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
456 the statements below LST. */
457
458static void
459lst_apply_interchange (lst_p lst, int depth1, int depth2)
460{
461 if (!lst)
462 return;
463
464 if (LST_LOOP_P (lst))
465 {
466 int i;
467 lst_p l;
468
9771b263 469 FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
95baeff8
SP
470 lst_apply_interchange (l, depth1, depth2);
471 }
472 else
473 pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
474}
475
6119e7d5
SP
476/* Return true when the nest starting at LOOP1 and ending on LOOP2 is
477 perfect: i.e. there are no sequence of statements. */
478
479static bool
480lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
481{
482 if (loop1 == loop2)
483 return true;
484
485 if (!LST_LOOP_P (loop1))
486 return false;
487
9771b263
DN
488 return LST_SEQ (loop1).length () == 1
489 && lst_perfectly_nested_p (LST_SEQ (loop1)[0], loop2);
6119e7d5
SP
490}
491
492/* Transform the loop nest between LOOP1 and LOOP2 into a perfect
493 nest. To continue the naming tradition, this function is called
7b7f2ca7
SP
494 after perfect_nestify. NEST is set to the perfectly nested loop
495 that is created. BEFORE/AFTER are set to the loops distributed
496 before/after the loop NEST. */
6119e7d5
SP
497
498static void
7b7f2ca7
SP
499lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
500 lst_p *nest, lst_p *after)
6119e7d5 501{
6119e7d5
SP
502 poly_bb_p first, last;
503
504 gcc_assert (loop1 && loop2
505 && loop1 != loop2
506 && LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
507
508 first = LST_PBB (lst_find_first_pbb (loop2));
509 last = LST_PBB (lst_find_last_pbb (loop2));
510
7b7f2ca7
SP
511 *before = copy_lst (loop1);
512 *nest = copy_lst (loop1);
513 *after = copy_lst (loop1);
6119e7d5 514
7b7f2ca7
SP
515 lst_remove_all_before_including_pbb (*before, first, false);
516 lst_remove_all_before_including_pbb (*after, last, true);
6119e7d5 517
7b7f2ca7
SP
518 lst_remove_all_before_excluding_pbb (*nest, first, true);
519 lst_remove_all_before_excluding_pbb (*nest, last, false);
070ba483
SP
520
521 if (lst_empty_p (*before))
556afcdc
SP
522 {
523 free_lst (*before);
524 *before = NULL;
525 }
070ba483 526 if (lst_empty_p (*after))
556afcdc
SP
527 {
528 free_lst (*after);
529 *after = NULL;
530 }
070ba483 531 if (lst_empty_p (*nest))
556afcdc
SP
532 {
533 free_lst (*nest);
534 *nest = NULL;
535 }
6119e7d5 536}
95baeff8
SP
537
538/* Try to interchange LOOP1 with LOOP2 for all the statements of the
539 body of LOOP2. LOOP1 contains LOOP2. Return true if it did the
e68c3c6c 540 interchange. */
95baeff8
SP
541
542static bool
e68c3c6c 543lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
95baeff8
SP
544{
545 int depth1 = lst_depth (loop1);
546 int depth2 = lst_depth (loop2);
7b7f2ca7
SP
547 lst_p transformed;
548
e68c3c6c 549 lst_p before = NULL, nest = NULL, after = NULL;
95baeff8 550
6119e7d5 551 if (!lst_perfectly_nested_p (loop1, loop2))
e68c3c6c 552 lst_perfect_nestify (loop1, loop2, &before, &nest, &after);
6119e7d5 553
92d23680
SP
554 if (!lst_interchange_profitable_p (loop2, depth1, depth2))
555 return false;
556
95baeff8
SP
557 lst_apply_interchange (loop2, depth1, depth2);
558
7b7f2ca7
SP
559 /* Sync the transformed LST information and the PBB scatterings
560 before using the scatterings in the data dependence analysis. */
e68c3c6c 561 if (before || nest || after)
7b7f2ca7
SP
562 {
563 transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
e68c3c6c 564 before, nest, after);
7b7f2ca7
SP
565 lst_update_scattering (transformed);
566 free_lst (transformed);
567 }
568
95baeff8
SP
569 if (graphite_legal_transform (scop))
570 {
571 if (dump_file && (dump_flags & TDF_DETAILS))
572 fprintf (dump_file,
573 "Loops at depths %d and %d will be interchanged.\n",
574 depth1, depth2);
575
7b7f2ca7 576 /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP. */
e68c3c6c
SP
577 lst_insert_in_sequence (before, loop1, true);
578 lst_insert_in_sequence (after, loop1, false);
7b7f2ca7 579
e68c3c6c 580 if (nest)
7b7f2ca7 581 {
e68c3c6c 582 lst_replace (loop1, nest);
7b7f2ca7
SP
583 free_lst (loop1);
584 }
585
95baeff8
SP
586 return true;
587 }
588
589 /* Undo the transform. */
556afcdc
SP
590 free_lst (before);
591 free_lst (nest);
592 free_lst (after);
95baeff8
SP
593 lst_apply_interchange (loop2, depth2, depth1);
594 return false;
595}
596
070ba483
SP
597/* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged
598 with the loop OUTER in LST_SEQ (OUTER_FATHER). */
95baeff8
SP
599
600static bool
070ba483
SP
601lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer,
602 lst_p inner_father)
95baeff8 603{
070ba483 604 int inner;
e68c3c6c 605 lst_p loop1, loop2;
7b7f2ca7 606
070ba483
SP
607 gcc_assert (outer_father
608 && LST_LOOP_P (outer_father)
9771b263 609 && LST_LOOP_P (LST_SEQ (outer_father)[outer])
070ba483
SP
610 && inner_father
611 && LST_LOOP_P (inner_father));
95baeff8 612
9771b263 613 loop1 = LST_SEQ (outer_father)[outer];
7b7f2ca7 614
9771b263 615 FOR_EACH_VEC_ELT (LST_SEQ (inner_father), inner, loop2)
e68c3c6c
SP
616 if (LST_LOOP_P (loop2)
617 && (lst_try_interchange_loops (scop, loop1, loop2)
618 || lst_interchange_select_inner (scop, outer_father, outer, loop2)))
619 return true;
620
621 return false;
7b7f2ca7 622}
95baeff8 623
7b7f2ca7 624/* Interchanges all the loops of LOOP and the loops of its body that
cec11ec4
SP
625 are considered profitable to interchange. Return the number of
626 interchanged loops. OUTER is the index in LST_SEQ (LOOP) that
070ba483 627 points to the next outer loop to be considered for interchange. */
95baeff8 628
cec11ec4 629static int
070ba483 630lst_interchange_select_outer (scop_p scop, lst_p loop, int outer)
7b7f2ca7
SP
631{
632 lst_p l;
cec11ec4 633 int res = 0;
070ba483
SP
634 int i = 0;
635 lst_p father;
95baeff8 636
7b7f2ca7 637 if (!loop || !LST_LOOP_P (loop))
cec11ec4 638 return 0;
95baeff8 639
070ba483
SP
640 father = LST_LOOP_FATHER (loop);
641 if (father)
642 {
e68c3c6c
SP
643 while (lst_interchange_select_inner (scop, father, outer, loop))
644 {
cec11ec4 645 res++;
9771b263 646 loop = LST_SEQ (father)[outer];
e68c3c6c 647 }
070ba483
SP
648 }
649
650 if (LST_LOOP_P (loop))
9771b263 651 FOR_EACH_VEC_ELT (LST_SEQ (loop), i, l)
070ba483 652 if (LST_LOOP_P (l))
cec11ec4 653 res += lst_interchange_select_outer (scop, l, i);
7b7f2ca7 654
7b7f2ca7 655 return res;
2abae5f1
SP
656}
657
cec11ec4
SP
658/* Interchanges all the loop depths that are considered profitable for
659 SCOP. Return the number of interchanged loops. */
2abae5f1 660
cec11ec4 661int
2abae5f1
SP
662scop_do_interchange (scop_p scop)
663{
cec11ec4 664 int res = lst_interchange_select_outer
070ba483 665 (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0);
7b7f2ca7
SP
666
667 lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
f4648ed1 668
6119e7d5 669 return res;
2abae5f1
SP
670}
671
95baeff8 672
2abae5f1
SP
673#endif
674