]>
Commit | Line | Data |
---|---|---|
2abae5f1 SP |
1 | /* Interchange heuristics and transform for loop interchange on |
2 | polyhedral representation. | |
3 | ||
23a5b65a | 4 | Copyright (C) 2009-2014 Free Software Foundation, Inc. |
2abae5f1 SP |
5 | Contributed by Sebastian Pop <sebastian.pop@amd.com> and |
6 | Harsha Jagasia <harsha.jagasia@amd.com>. | |
7 | ||
8 | This file is part of GCC. | |
9 | ||
10 | GCC is free software; you can redistribute it and/or modify | |
11 | it under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | GCC is distributed in the hope that it will be useful, | |
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | GNU General Public License for more details. | |
19 | ||
20 | You should have received a copy of the GNU General Public License | |
21 | along with GCC; see the file COPYING3. If not see | |
22 | <http://www.gnu.org/licenses/>. */ | |
33ad93b9 | 23 | |
2abae5f1 | 24 | #include "config.h" |
33ad93b9 RG |
25 | |
26 | #ifdef HAVE_cloog | |
27 | #include <isl/aff.h> | |
28 | #include <isl/set.h> | |
29 | #include <isl/map.h> | |
30 | #include <isl/union_map.h> | |
31 | #include <isl/ilp.h> | |
32 | #include <cloog/cloog.h> | |
33 | #include <cloog/isl/domain.h> | |
34 | #endif | |
35 | ||
2abae5f1 SP |
36 | #include "system.h" |
37 | #include "coretypes.h" | |
4d648807 | 38 | #include "tree.h" |
2fb9a547 AM |
39 | #include "basic-block.h" |
40 | #include "tree-ssa-alias.h" | |
41 | #include "internal-fn.h" | |
42 | #include "gimple-expr.h" | |
43 | #include "is-a.h" | |
442b4905 | 44 | #include "gimple.h" |
5be5c238 | 45 | #include "gimple-iterator.h" |
442b4905 | 46 | #include "tree-ssa-loop.h" |
7ee2468b | 47 | #include "dumpfile.h" |
2abae5f1 SP |
48 | #include "cfgloop.h" |
49 | #include "tree-chrec.h" | |
50 | #include "tree-data-ref.h" | |
51 | #include "tree-scalar-evolution.h" | |
1bd6497c | 52 | #include "sese.h" |
2abae5f1 SP |
53 | |
54 | #ifdef HAVE_cloog | |
2abae5f1 SP |
55 | #include "graphite-poly.h" |
56 | ||
33ad93b9 | 57 | /* XXX isl rewrite following comment */ |
fb9fb290 SP |
58 | /* Builds a linear expression, of dimension DIM, representing PDR's |
59 | memory access: | |
60 | ||
61 | L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}. | |
62 | ||
63 | For an array A[10][20] with two subscript locations s0 and s1, the | |
64 | linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0 | |
b0c7a278 KT |
65 | corresponds to a memory stride of 20. |
66 | ||
67 | OFFSET is a number of dimensions to prepend before the | |
68 | subscript dimensions: s_0, s_1, ..., s_n. | |
69 | ||
70 | Thus, the final linear expression has the following format: | |
71 | 0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n | |
72 | where the expression itself is: | |
73 | c_0 * s_0 + c_1 * s_1 + ... c_n * s_n. */ | |
fb9fb290 | 74 | |
33ad93b9 RG |
75 | static isl_constraint * |
76 | build_linearized_memory_access (isl_map *map, poly_dr_p pdr) | |
fb9fb290 | 77 | { |
33ad93b9 RG |
78 | isl_constraint *res; |
79 | isl_local_space *ls = isl_local_space_from_space (isl_map_get_space (map)); | |
80 | unsigned offset, nsubs; | |
81 | int i; | |
82 | isl_int size, subsize; | |
83 | ||
84 | res = isl_equality_alloc (ls); | |
85 | isl_int_init (size); | |
86 | isl_int_set_ui (size, 1); | |
87 | isl_int_init (subsize); | |
88 | isl_int_set_ui (subsize, 1); | |
89 | ||
90 | nsubs = isl_set_dim (pdr->extent, isl_dim_set); | |
91 | /* -1 for the already included L dimension. */ | |
92 | offset = isl_map_dim (map, isl_dim_out) - 1 - nsubs; | |
93 | res = isl_constraint_set_coefficient_si (res, isl_dim_out, offset + nsubs, -1); | |
94 | /* Go through all subscripts from last to first. First dimension | |
95 | is the alias set, ignore it. */ | |
96 | for (i = nsubs - 1; i >= 1; i--) | |
fb9fb290 | 97 | { |
33ad93b9 RG |
98 | isl_space *dc; |
99 | isl_aff *aff; | |
100 | ||
101 | res = isl_constraint_set_coefficient (res, isl_dim_out, offset + i, size); | |
fb9fb290 | 102 | |
33ad93b9 RG |
103 | dc = isl_set_get_space (pdr->extent); |
104 | aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc)); | |
105 | aff = isl_aff_set_coefficient_si (aff, isl_dim_in, i, 1); | |
106 | isl_set_max (pdr->extent, aff, &subsize); | |
107 | isl_aff_free (aff); | |
108 | isl_int_mul (size, size, subsize); | |
fb9fb290 SP |
109 | } |
110 | ||
33ad93b9 RG |
111 | isl_int_clear (subsize); |
112 | isl_int_clear (size); | |
113 | ||
fb9fb290 SP |
114 | return res; |
115 | } | |
116 | ||
33ad93b9 RG |
117 | /* Set STRIDE to the stride of PDR in memory by advancing by one in |
118 | the loop at DEPTH. */ | |
2bc529bf KT |
119 | |
120 | static void | |
33ad93b9 | 121 | pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr) |
2bc529bf | 122 | { |
33ad93b9 RG |
123 | poly_bb_p pbb = PDR_PBB (pdr); |
124 | isl_map *map; | |
125 | isl_set *set; | |
126 | isl_aff *aff; | |
127 | isl_space *dc; | |
128 | isl_constraint *lma, *c; | |
129 | isl_int islstride; | |
130 | graphite_dim_t time_depth; | |
131 | unsigned offset, nt; | |
132 | unsigned i; | |
133 | /* XXX isl rewrite following comments. */ | |
134 | /* Builds a partial difference equations and inserts them | |
135 | into pointset powerset polyhedron P. Polyhedron is assumed | |
136 | to have the format: T|I|T'|I'|G|S|S'|l1|l2. | |
137 | ||
138 | TIME_DEPTH is the time dimension w.r.t. which we are | |
139 | differentiating. | |
140 | OFFSET represents the number of dimensions between | |
141 | columns t_{time_depth} and t'_{time_depth}. | |
142 | DIM_SCTR is the number of scattering dimensions. It is | |
143 | essentially the dimensionality of the T vector. | |
144 | ||
145 | The following equations are inserted into the polyhedron P: | |
146 | | t_1 = t_1' | |
147 | | ... | |
148 | | t_{time_depth-1} = t'_{time_depth-1} | |
149 | | t_{time_depth} = t'_{time_depth} + 1 | |
150 | | t_{time_depth+1} = t'_{time_depth + 1} | |
151 | | ... | |
152 | | t_{dim_sctr} = t'_{dim_sctr}. */ | |
2bc529bf KT |
153 | |
154 | /* Add the equality: t_{time_depth} = t'_{time_depth} + 1. | |
155 | This is the core part of this alogrithm, since this | |
156 | constraint asks for the memory access stride (difference) | |
157 | between two consecutive points in time dimensions. */ | |
158 | ||
2bc529bf KT |
159 | /* Add equalities: |
160 | | t1 = t1' | |
161 | | ... | |
162 | | t_{time_depth-1} = t'_{time_depth-1} | |
163 | | t_{time_depth+1} = t'_{time_depth+1} | |
164 | | ... | |
165 | | t_{dim_sctr} = t'_{dim_sctr} | |
166 | ||
167 | This means that all the time dimensions are equal except for | |
168 | time_depth, where the constraint is t_{depth} = t'_{depth} + 1 | |
073a8998 | 169 | step. More to this: we should be careful not to add equalities |
2bc529bf KT |
170 | to the 'coupled' dimensions, which happens when the one dimension |
171 | is stripmined dimension, and the other dimension corresponds | |
172 | to the point loop inside stripmined dimension. */ | |
173 | ||
33ad93b9 RG |
174 | /* pdr->accesses: [P1..nb_param,I1..nb_domain]->[a,S1..nb_subscript] |
175 | ??? [P] not used for PDRs? | |
176 | pdr->extent: [a,S1..nb_subscript] | |
177 | pbb->domain: [P1..nb_param,I1..nb_domain] | |
178 | pbb->transformed: [P1..nb_param,I1..nb_domain]->[T1..Tnb_sctr] | |
179 | [T] includes local vars (currently unused) | |
180 | ||
181 | First we create [P,I] -> [T,a,S]. */ | |
182 | ||
183 | map = isl_map_flat_range_product (isl_map_copy (pbb->transformed), | |
184 | isl_map_copy (pdr->accesses)); | |
185 | /* Add a dimension for L: [P,I] -> [T,a,S,L].*/ | |
186 | map = isl_map_add_dims (map, isl_dim_out, 1); | |
187 | /* Build a constraint for "lma[S] - L == 0", effectively calculating | |
188 | L in terms of subscripts. */ | |
189 | lma = build_linearized_memory_access (map, pdr); | |
190 | /* And add it to the map, so we now have: | |
191 | [P,I] -> [T,a,S,L] : lma([S]) == L. */ | |
192 | map = isl_map_add_constraint (map, lma); | |
193 | ||
194 | /* Then we create [P,I,P',I'] -> [T,a,S,L,T',a',S',L']. */ | |
195 | map = isl_map_flat_product (map, isl_map_copy (map)); | |
196 | ||
197 | /* Now add the equality T[time_depth] == T'[time_depth]+1. This will | |
198 | force L' to be the linear address at T[time_depth] + 1. */ | |
199 | time_depth = psct_dynamic_dim (pbb, depth); | |
200 | /* Length of [a,S] plus [L] ... */ | |
201 | offset = 1 + isl_map_dim (pdr->accesses, isl_dim_out); | |
202 | /* ... plus [T]. */ | |
203 | offset += isl_map_dim (pbb->transformed, isl_dim_out); | |
204 | ||
205 | c = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space (map))); | |
206 | c = isl_constraint_set_coefficient_si (c, isl_dim_out, time_depth, 1); | |
207 | c = isl_constraint_set_coefficient_si (c, isl_dim_out, | |
208 | offset + time_depth, -1); | |
209 | c = isl_constraint_set_constant_si (c, 1); | |
210 | map = isl_map_add_constraint (map, c); | |
211 | ||
212 | /* Now we equate most of the T/T' elements (making PITaSL nearly | |
213 | the same is (PITaSL)', except for one dimension, namely for 'depth' | |
214 | (an index into [I]), after translating to index into [T]. Take care | |
215 | to not produce an empty map, which indicates we wanted to equate | |
216 | two dimensions that are already coupled via the above time_depth | |
217 | dimension. Happens with strip mining where several scatter dimension | |
218 | are interdependend. */ | |
219 | /* Length of [T]. */ | |
220 | nt = pbb_nb_scattering_transform (pbb) + pbb_nb_local_vars (pbb); | |
221 | for (i = 0; i < nt; i++) | |
2bc529bf KT |
222 | if (i != time_depth) |
223 | { | |
33ad93b9 RG |
224 | isl_map *temp = isl_map_equate (isl_map_copy (map), |
225 | isl_dim_out, i, | |
226 | isl_dim_out, offset + i); | |
227 | if (isl_map_is_empty (temp)) | |
228 | isl_map_free (temp); | |
229 | else | |
230 | { | |
231 | isl_map_free (map); | |
232 | map = temp; | |
233 | } | |
2bc529bf KT |
234 | } |
235 | ||
33ad93b9 RG |
236 | /* Now maximize the expression L' - L. */ |
237 | set = isl_map_range (map); | |
238 | dc = isl_set_get_space (set); | |
239 | aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc)); | |
240 | aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset - 1, -1); | |
241 | aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset + offset - 1, 1); | |
242 | isl_int_init (islstride); | |
243 | isl_set_max (set, aff, &islstride); | |
244 | isl_int_get_gmp (islstride, stride); | |
245 | isl_int_clear (islstride); | |
246 | isl_aff_free (aff); | |
247 | isl_set_free (set); | |
67255edf | 248 | |
2bc529bf KT |
249 | if (dump_file && (dump_flags & TDF_DETAILS)) |
250 | { | |
8ac16127 MG |
251 | gmp_fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d: %Zd ", |
252 | pbb_index (pbb), PDR_ID (pdr), (int) depth, stride); | |
2bc529bf | 253 | } |
2abae5f1 SP |
254 | } |
255 | ||
bf69e754 SP |
256 | /* Sets STRIDES to the sum of all the strides of the data references |
257 | accessed in LOOP at DEPTH. */ | |
aec12420 SP |
258 | |
259 | static void | |
e262fdda | 260 | memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides) |
aec12420 | 261 | { |
bf69e754 SP |
262 | int i, j; |
263 | lst_p l; | |
aec12420 | 264 | poly_dr_p pdr; |
e262fdda | 265 | mpz_t s, n; |
aec12420 | 266 | |
a0bb35c7 AS |
267 | mpz_init (s); |
268 | mpz_init (n); | |
aec12420 | 269 | |
9771b263 | 270 | FOR_EACH_VEC_ELT (LST_SEQ (loop), j, l) |
bf69e754 | 271 | if (LST_LOOP_P (l)) |
eaffa762 | 272 | memory_strides_in_loop_1 (l, depth, strides); |
bf69e754 | 273 | else |
9771b263 | 274 | FOR_EACH_VEC_ELT (PBB_DRS (LST_PBB (l)), i, pdr) |
bf69e754 | 275 | { |
eaffa762 | 276 | pdr_stride_in_loop (s, depth, pdr); |
a0bb35c7 AS |
277 | mpz_set_si (n, PDR_NB_REFS (pdr)); |
278 | mpz_mul (s, s, n); | |
279 | mpz_add (strides, strides, s); | |
bf69e754 | 280 | } |
aec12420 | 281 | |
a0bb35c7 AS |
282 | mpz_clear (s); |
283 | mpz_clear (n); | |
aec12420 SP |
284 | } |
285 | ||
eaffa762 SP |
286 | /* Sets STRIDES to the sum of all the strides of the data references |
287 | accessed in LOOP at DEPTH. */ | |
288 | ||
289 | static void | |
e262fdda | 290 | memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides) |
eaffa762 | 291 | { |
a0bb35c7 | 292 | if (mpz_cmp_si (loop->memory_strides, -1) == 0) |
eaffa762 | 293 | { |
a0bb35c7 | 294 | mpz_set_si (strides, 0); |
eaffa762 SP |
295 | memory_strides_in_loop_1 (loop, depth, strides); |
296 | } | |
297 | else | |
a0bb35c7 | 298 | mpz_set (strides, loop->memory_strides); |
eaffa762 SP |
299 | } |
300 | ||
bf69e754 SP |
301 | /* Return true when the interchange of loops LOOP1 and LOOP2 is |
302 | profitable. | |
fb9fb290 SP |
303 | |
304 | Example: | |
305 | ||
306 | | int a[100][100]; | |
307 | | | |
308 | | int | |
309 | | foo (int N) | |
310 | | { | |
311 | | int j; | |
312 | | int i; | |
313 | | | |
314 | | for (i = 0; i < N; i++) | |
315 | | for (j = 0; j < N; j++) | |
316 | | a[j][2 * i] += 1; | |
317 | | | |
318 | | return a[N][12]; | |
319 | | } | |
320 | ||
321 | The data access A[j][i] is described like this: | |
322 | ||
323 | | i j N a s0 s1 1 | |
324 | | 0 0 0 1 0 0 -5 = 0 | |
325 | | 0 -1 0 0 1 0 0 = 0 | |
326 | |-2 0 0 0 0 1 0 = 0 | |
327 | | 0 0 0 0 1 0 0 >= 0 | |
328 | | 0 0 0 0 0 1 0 >= 0 | |
329 | | 0 0 0 0 -1 0 100 >= 0 | |
330 | | 0 0 0 0 0 -1 100 >= 0 | |
331 | ||
332 | The linearized memory access L to A[100][100] is: | |
333 | ||
334 | | i j N a s0 s1 1 | |
335 | | 0 0 0 0 100 1 0 | |
336 | ||
b0c7a278 KT |
337 | TODO: the shown format is not valid as it does not show the fact |
338 | that the iteration domain "i j" is transformed using the scattering. | |
339 | ||
fb9fb290 SP |
340 | Next, to measure the impact of iterating once in loop "i", we build |
341 | a maximization problem: first, we add to DR accesses the dimensions | |
009150e1 SP |
342 | k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1. |
343 | L1 and L2 are the linearized memory access functions. | |
fb9fb290 SP |
344 | |
345 | | i j N a s0 s1 k s2 s3 L1 L2 D1 1 | |
346 | | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5 | |
347 | | 0 -1 0 0 1 0 0 0 0 0 0 0 0 = 0 s0 = j | |
348 | |-2 0 0 0 0 1 0 0 0 0 0 0 0 = 0 s1 = 2 * i | |
349 | | 0 0 0 0 1 0 0 0 0 0 0 0 0 >= 0 | |
350 | | 0 0 0 0 0 1 0 0 0 0 0 0 0 >= 0 | |
351 | | 0 0 0 0 -1 0 0 0 0 0 0 0 100 >= 0 | |
352 | | 0 0 0 0 0 -1 0 0 0 0 0 0 100 >= 0 | |
353 | | 0 0 0 0 100 1 0 0 0 -1 0 0 0 = 0 L1 = 100 * s0 + s1 | |
354 | ||
355 | Then, we generate the polyhedron P2 by interchanging the dimensions | |
b0c7a278 | 356 | (s0, s2), (s1, s3), (L1, L2), (k, i) |
fb9fb290 SP |
357 | |
358 | | i j N a s0 s1 k s2 s3 L1 L2 D1 1 | |
359 | | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5 | |
360 | | 0 -1 0 0 0 0 0 1 0 0 0 0 0 = 0 s2 = j | |
361 | | 0 0 0 0 0 0 -2 0 1 0 0 0 0 = 0 s3 = 2 * k | |
362 | | 0 0 0 0 0 0 0 1 0 0 0 0 0 >= 0 | |
363 | | 0 0 0 0 0 0 0 0 1 0 0 0 0 >= 0 | |
364 | | 0 0 0 0 0 0 0 -1 0 0 0 0 100 >= 0 | |
365 | | 0 0 0 0 0 0 0 0 -1 0 0 0 100 >= 0 | |
366 | | 0 0 0 0 0 0 0 100 1 0 -1 0 0 = 0 L2 = 100 * s2 + s3 | |
367 | ||
368 | then we add to P2 the equality k = i + 1: | |
369 | ||
370 | |-1 0 0 0 0 0 1 0 0 0 0 0 -1 = 0 k = i + 1 | |
371 | ||
372 | and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)". | |
373 | ||
b0c7a278 | 374 | Similarly, to determine the impact of one iteration on loop "j", we |
fb9fb290 SP |
375 | interchange (k, j), we add "k = j + 1", and we compute D2 the |
376 | maximal value of the difference. | |
377 | ||
378 | Finally, the profitability test is D1 < D2: if in the outer loop | |
379 | the strides are smaller than in the inner loop, then it is | |
380 | profitable to interchange the loops at DEPTH1 and DEPTH2. */ | |
2abae5f1 SP |
381 | |
382 | static bool | |
92d23680 | 383 | lst_interchange_profitable_p (lst_p nest, int depth1, int depth2) |
2abae5f1 | 384 | { |
e262fdda | 385 | mpz_t d1, d2; |
2abae5f1 SP |
386 | bool res; |
387 | ||
92d23680 | 388 | gcc_assert (depth1 < depth2); |
2abae5f1 | 389 | |
a0bb35c7 AS |
390 | mpz_init (d1); |
391 | mpz_init (d2); | |
2abae5f1 | 392 | |
92d23680 SP |
393 | memory_strides_in_loop (nest, depth1, d1); |
394 | memory_strides_in_loop (nest, depth2, d2); | |
2abae5f1 | 395 | |
589ac63c | 396 | res = mpz_cmp (d1, d2) < 0; |
2abae5f1 | 397 | |
a0bb35c7 AS |
398 | mpz_clear (d1); |
399 | mpz_clear (d2); | |
2abae5f1 SP |
400 | |
401 | return res; | |
402 | } | |
403 | ||
404 | /* Interchanges the loops at DEPTH1 and DEPTH2 of the original | |
405 | scattering and assigns the resulting polyhedron to the transformed | |
406 | scattering. */ | |
407 | ||
408 | static void | |
b0c7a278 KT |
409 | pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2, |
410 | poly_bb_p pbb) | |
2abae5f1 | 411 | { |
33ad93b9 RG |
412 | unsigned i; |
413 | unsigned dim1 = psct_dynamic_dim (pbb, depth1); | |
414 | unsigned dim2 = psct_dynamic_dim (pbb, depth2); | |
415 | isl_space *d = isl_map_get_space (pbb->transformed); | |
416 | isl_space *d1 = isl_space_range (d); | |
417 | unsigned n = isl_space_dim (d1, isl_dim_out); | |
418 | isl_space *d2 = isl_space_add_dims (d1, isl_dim_in, n); | |
419 | isl_map *x = isl_map_universe (d2); | |
420 | ||
421 | x = isl_map_equate (x, isl_dim_in, dim1, isl_dim_out, dim2); | |
422 | x = isl_map_equate (x, isl_dim_in, dim2, isl_dim_out, dim1); | |
423 | ||
424 | for (i = 0; i < n; i++) | |
425 | if (i != dim1 && i != dim2) | |
426 | x = isl_map_equate (x, isl_dim_in, i, isl_dim_out, i); | |
427 | ||
428 | pbb->transformed = isl_map_apply_range (pbb->transformed, x); | |
2abae5f1 SP |
429 | } |
430 | ||
95baeff8 SP |
431 | /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all |
432 | the statements below LST. */ | |
433 | ||
434 | static void | |
435 | lst_apply_interchange (lst_p lst, int depth1, int depth2) | |
436 | { | |
437 | if (!lst) | |
438 | return; | |
439 | ||
440 | if (LST_LOOP_P (lst)) | |
441 | { | |
442 | int i; | |
443 | lst_p l; | |
444 | ||
9771b263 | 445 | FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l) |
95baeff8 SP |
446 | lst_apply_interchange (l, depth1, depth2); |
447 | } | |
448 | else | |
449 | pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst)); | |
450 | } | |
451 | ||
6119e7d5 SP |
452 | /* Return true when the nest starting at LOOP1 and ending on LOOP2 is |
453 | perfect: i.e. there are no sequence of statements. */ | |
454 | ||
455 | static bool | |
456 | lst_perfectly_nested_p (lst_p loop1, lst_p loop2) | |
457 | { | |
458 | if (loop1 == loop2) | |
459 | return true; | |
460 | ||
461 | if (!LST_LOOP_P (loop1)) | |
462 | return false; | |
463 | ||
9771b263 DN |
464 | return LST_SEQ (loop1).length () == 1 |
465 | && lst_perfectly_nested_p (LST_SEQ (loop1)[0], loop2); | |
6119e7d5 SP |
466 | } |
467 | ||
468 | /* Transform the loop nest between LOOP1 and LOOP2 into a perfect | |
469 | nest. To continue the naming tradition, this function is called | |
7b7f2ca7 SP |
470 | after perfect_nestify. NEST is set to the perfectly nested loop |
471 | that is created. BEFORE/AFTER are set to the loops distributed | |
472 | before/after the loop NEST. */ | |
6119e7d5 SP |
473 | |
474 | static void | |
7b7f2ca7 SP |
475 | lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before, |
476 | lst_p *nest, lst_p *after) | |
6119e7d5 | 477 | { |
6119e7d5 SP |
478 | poly_bb_p first, last; |
479 | ||
480 | gcc_assert (loop1 && loop2 | |
481 | && loop1 != loop2 | |
482 | && LST_LOOP_P (loop1) && LST_LOOP_P (loop2)); | |
483 | ||
484 | first = LST_PBB (lst_find_first_pbb (loop2)); | |
485 | last = LST_PBB (lst_find_last_pbb (loop2)); | |
486 | ||
7b7f2ca7 SP |
487 | *before = copy_lst (loop1); |
488 | *nest = copy_lst (loop1); | |
489 | *after = copy_lst (loop1); | |
6119e7d5 | 490 | |
7b7f2ca7 SP |
491 | lst_remove_all_before_including_pbb (*before, first, false); |
492 | lst_remove_all_before_including_pbb (*after, last, true); | |
6119e7d5 | 493 | |
7b7f2ca7 SP |
494 | lst_remove_all_before_excluding_pbb (*nest, first, true); |
495 | lst_remove_all_before_excluding_pbb (*nest, last, false); | |
070ba483 SP |
496 | |
497 | if (lst_empty_p (*before)) | |
556afcdc SP |
498 | { |
499 | free_lst (*before); | |
500 | *before = NULL; | |
501 | } | |
070ba483 | 502 | if (lst_empty_p (*after)) |
556afcdc SP |
503 | { |
504 | free_lst (*after); | |
505 | *after = NULL; | |
506 | } | |
070ba483 | 507 | if (lst_empty_p (*nest)) |
556afcdc SP |
508 | { |
509 | free_lst (*nest); | |
510 | *nest = NULL; | |
511 | } | |
6119e7d5 | 512 | } |
95baeff8 SP |
513 | |
514 | /* Try to interchange LOOP1 with LOOP2 for all the statements of the | |
515 | body of LOOP2. LOOP1 contains LOOP2. Return true if it did the | |
e68c3c6c | 516 | interchange. */ |
95baeff8 SP |
517 | |
518 | static bool | |
e68c3c6c | 519 | lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2) |
95baeff8 SP |
520 | { |
521 | int depth1 = lst_depth (loop1); | |
522 | int depth2 = lst_depth (loop2); | |
7b7f2ca7 SP |
523 | lst_p transformed; |
524 | ||
e68c3c6c | 525 | lst_p before = NULL, nest = NULL, after = NULL; |
95baeff8 | 526 | |
6119e7d5 | 527 | if (!lst_perfectly_nested_p (loop1, loop2)) |
e68c3c6c | 528 | lst_perfect_nestify (loop1, loop2, &before, &nest, &after); |
6119e7d5 | 529 | |
92d23680 SP |
530 | if (!lst_interchange_profitable_p (loop2, depth1, depth2)) |
531 | return false; | |
532 | ||
95baeff8 SP |
533 | lst_apply_interchange (loop2, depth1, depth2); |
534 | ||
7b7f2ca7 SP |
535 | /* Sync the transformed LST information and the PBB scatterings |
536 | before using the scatterings in the data dependence analysis. */ | |
e68c3c6c | 537 | if (before || nest || after) |
7b7f2ca7 SP |
538 | { |
539 | transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1, | |
e68c3c6c | 540 | before, nest, after); |
7b7f2ca7 SP |
541 | lst_update_scattering (transformed); |
542 | free_lst (transformed); | |
543 | } | |
544 | ||
95baeff8 SP |
545 | if (graphite_legal_transform (scop)) |
546 | { | |
547 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
548 | fprintf (dump_file, | |
549 | "Loops at depths %d and %d will be interchanged.\n", | |
550 | depth1, depth2); | |
551 | ||
7b7f2ca7 | 552 | /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP. */ |
e68c3c6c SP |
553 | lst_insert_in_sequence (before, loop1, true); |
554 | lst_insert_in_sequence (after, loop1, false); | |
7b7f2ca7 | 555 | |
e68c3c6c | 556 | if (nest) |
7b7f2ca7 | 557 | { |
e68c3c6c | 558 | lst_replace (loop1, nest); |
7b7f2ca7 SP |
559 | free_lst (loop1); |
560 | } | |
561 | ||
95baeff8 SP |
562 | return true; |
563 | } | |
564 | ||
565 | /* Undo the transform. */ | |
556afcdc SP |
566 | free_lst (before); |
567 | free_lst (nest); | |
568 | free_lst (after); | |
95baeff8 SP |
569 | lst_apply_interchange (loop2, depth2, depth1); |
570 | return false; | |
571 | } | |
572 | ||
070ba483 SP |
573 | /* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged |
574 | with the loop OUTER in LST_SEQ (OUTER_FATHER). */ | |
95baeff8 SP |
575 | |
576 | static bool | |
070ba483 SP |
577 | lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer, |
578 | lst_p inner_father) | |
95baeff8 | 579 | { |
070ba483 | 580 | int inner; |
e68c3c6c | 581 | lst_p loop1, loop2; |
7b7f2ca7 | 582 | |
070ba483 SP |
583 | gcc_assert (outer_father |
584 | && LST_LOOP_P (outer_father) | |
9771b263 | 585 | && LST_LOOP_P (LST_SEQ (outer_father)[outer]) |
070ba483 SP |
586 | && inner_father |
587 | && LST_LOOP_P (inner_father)); | |
95baeff8 | 588 | |
9771b263 | 589 | loop1 = LST_SEQ (outer_father)[outer]; |
7b7f2ca7 | 590 | |
9771b263 | 591 | FOR_EACH_VEC_ELT (LST_SEQ (inner_father), inner, loop2) |
e68c3c6c SP |
592 | if (LST_LOOP_P (loop2) |
593 | && (lst_try_interchange_loops (scop, loop1, loop2) | |
594 | || lst_interchange_select_inner (scop, outer_father, outer, loop2))) | |
595 | return true; | |
596 | ||
597 | return false; | |
7b7f2ca7 | 598 | } |
95baeff8 | 599 | |
7b7f2ca7 | 600 | /* Interchanges all the loops of LOOP and the loops of its body that |
cec11ec4 SP |
601 | are considered profitable to interchange. Return the number of |
602 | interchanged loops. OUTER is the index in LST_SEQ (LOOP) that | |
070ba483 | 603 | points to the next outer loop to be considered for interchange. */ |
95baeff8 | 604 | |
cec11ec4 | 605 | static int |
070ba483 | 606 | lst_interchange_select_outer (scop_p scop, lst_p loop, int outer) |
7b7f2ca7 SP |
607 | { |
608 | lst_p l; | |
cec11ec4 | 609 | int res = 0; |
070ba483 SP |
610 | int i = 0; |
611 | lst_p father; | |
95baeff8 | 612 | |
7b7f2ca7 | 613 | if (!loop || !LST_LOOP_P (loop)) |
cec11ec4 | 614 | return 0; |
95baeff8 | 615 | |
070ba483 SP |
616 | father = LST_LOOP_FATHER (loop); |
617 | if (father) | |
618 | { | |
e68c3c6c SP |
619 | while (lst_interchange_select_inner (scop, father, outer, loop)) |
620 | { | |
cec11ec4 | 621 | res++; |
9771b263 | 622 | loop = LST_SEQ (father)[outer]; |
e68c3c6c | 623 | } |
070ba483 SP |
624 | } |
625 | ||
626 | if (LST_LOOP_P (loop)) | |
9771b263 | 627 | FOR_EACH_VEC_ELT (LST_SEQ (loop), i, l) |
070ba483 | 628 | if (LST_LOOP_P (l)) |
cec11ec4 | 629 | res += lst_interchange_select_outer (scop, l, i); |
7b7f2ca7 | 630 | |
7b7f2ca7 | 631 | return res; |
2abae5f1 SP |
632 | } |
633 | ||
cec11ec4 SP |
634 | /* Interchanges all the loop depths that are considered profitable for |
635 | SCOP. Return the number of interchanged loops. */ | |
2abae5f1 | 636 | |
cec11ec4 | 637 | int |
2abae5f1 SP |
638 | scop_do_interchange (scop_p scop) |
639 | { | |
cec11ec4 | 640 | int res = lst_interchange_select_outer |
070ba483 | 641 | (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0); |
7b7f2ca7 SP |
642 | |
643 | lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop)); | |
f4648ed1 | 644 | |
6119e7d5 | 645 | return res; |
2abae5f1 SP |
646 | } |
647 | ||
95baeff8 | 648 | |
2abae5f1 SP |
649 | #endif |
650 |