]>
Commit | Line | Data |
---|---|---|
c6bb733d | 1 | /* Interchange heuristics and transform for loop interchange on |
2 | polyhedral representation. | |
3 | ||
4 | Copyright (C) 2009 Free Software Foundation, Inc. | |
5 | Contributed by Sebastian Pop <sebastian.pop@amd.com> and | |
6 | Harsha Jagasia <harsha.jagasia@amd.com>. | |
7 | ||
8 | This file is part of GCC. | |
9 | ||
10 | GCC is free software; you can redistribute it and/or modify | |
11 | it under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | GCC is distributed in the hope that it will be useful, | |
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | GNU General Public License for more details. | |
19 | ||
20 | You should have received a copy of the GNU General Public License | |
21 | along with GCC; see the file COPYING3. If not see | |
22 | <http://www.gnu.org/licenses/>. */ | |
23 | #include "config.h" | |
24 | #include "system.h" | |
25 | #include "coretypes.h" | |
26 | #include "tm.h" | |
27 | #include "ggc.h" | |
28 | #include "tree.h" | |
29 | #include "rtl.h" | |
30 | #include "output.h" | |
31 | #include "basic-block.h" | |
32 | #include "diagnostic.h" | |
33 | #include "tree-flow.h" | |
34 | #include "toplev.h" | |
35 | #include "tree-dump.h" | |
36 | #include "timevar.h" | |
37 | #include "cfgloop.h" | |
38 | #include "tree-chrec.h" | |
39 | #include "tree-data-ref.h" | |
40 | #include "tree-scalar-evolution.h" | |
41 | #include "tree-pass.h" | |
42 | #include "domwalk.h" | |
43 | #include "value-prof.h" | |
44 | #include "pointer-set.h" | |
45 | #include "gimple.h" | |
46 | #include "params.h" | |
47 | ||
48 | #ifdef HAVE_cloog | |
c6bb733d | 49 | #include "ppl_c.h" |
50 | #include "sese.h" | |
51 | #include "graphite-ppl.h" | |
52 | #include "graphite.h" | |
53 | #include "graphite-poly.h" | |
54 | ||
80020e9d | 55 | /* Builds a linear expression, of dimension DIM, representing PDR's |
56 | memory access: | |
57 | ||
58 | L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}. | |
59 | ||
60 | For an array A[10][20] with two subscript locations s0 and s1, the | |
61 | linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0 | |
5e18ab2b | 62 | corresponds to a memory stride of 20. |
63 | ||
64 | OFFSET is a number of dimensions to prepend before the | |
65 | subscript dimensions: s_0, s_1, ..., s_n. | |
66 | ||
67 | Thus, the final linear expression has the following format: | |
68 | 0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n | |
69 | where the expression itself is: | |
70 | c_0 * s_0 + c_1 * s_1 + ... c_n * s_n. */ | |
80020e9d | 71 | |
72 | static ppl_Linear_Expression_t | |
5e18ab2b | 73 | build_linearized_memory_access (ppl_dimension_type offset, poly_dr_p pdr) |
80020e9d | 74 | { |
75 | ppl_Linear_Expression_t res; | |
76 | ppl_Linear_Expression_t le; | |
77 | ppl_dimension_type i; | |
78 | ppl_dimension_type first = pdr_subscript_dim (pdr, 0); | |
79 | ppl_dimension_type last = pdr_subscript_dim (pdr, PDR_NB_SUBSCRIPTS (pdr)); | |
0ef84e3b | 80 | mpz_t size, sub_size; |
5e18ab2b | 81 | graphite_dim_t dim = offset + pdr_dim (pdr); |
80020e9d | 82 | |
83 | ppl_new_Linear_Expression_with_dimension (&res, dim); | |
84 | ||
2d6fe479 | 85 | mpz_init (size); |
86 | mpz_set_si (size, 1); | |
87 | mpz_init (sub_size); | |
88 | mpz_set_si (sub_size, 1); | |
80020e9d | 89 | |
90 | for (i = last - 1; i >= first; i--) | |
91 | { | |
5e18ab2b | 92 | ppl_set_coef_gmp (res, i + offset, size); |
80020e9d | 93 | |
5e18ab2b | 94 | ppl_new_Linear_Expression_with_dimension (&le, dim - offset); |
80020e9d | 95 | ppl_set_coef (le, i, 1); |
55c54afb | 96 | ppl_max_for_le_pointset (PDR_ACCESSES (pdr), le, sub_size); |
2d6fe479 | 97 | mpz_mul (size, size, sub_size); |
80020e9d | 98 | ppl_delete_Linear_Expression (le); |
99 | } | |
100 | ||
2d6fe479 | 101 | mpz_clear (sub_size); |
102 | mpz_clear (size); | |
80020e9d | 103 | return res; |
104 | } | |
105 | ||
3bcc3204 | 106 | /* Builds a partial difference equations and inserts them |
107 | into pointset powerset polyhedron P. Polyhedron is assumed | |
108 | to have the format: T|I|T'|I'|G|S|S'|l1|l2. | |
109 | ||
110 | TIME_DEPTH is the time dimension w.r.t. which we are | |
111 | differentiating. | |
112 | OFFSET represents the number of dimensions between | |
113 | columns t_{time_depth} and t'_{time_depth}. | |
114 | DIM_SCTR is the number of scattering dimensions. It is | |
115 | essentially the dimensionality of the T vector. | |
116 | ||
117 | The following equations are inserted into the polyhedron P: | |
118 | | t_1 = t_1' | |
119 | | ... | |
120 | | t_{time_depth-1} = t'_{time_depth-1} | |
121 | | t_{time_depth} = t'_{time_depth} + 1 | |
122 | | t_{time_depth+1} = t'_{time_depth + 1} | |
123 | | ... | |
124 | | t_{dim_sctr} = t'_{dim_sctr}. */ | |
125 | ||
126 | static void | |
127 | build_partial_difference (ppl_Pointset_Powerset_C_Polyhedron_t *p, | |
128 | ppl_dimension_type time_depth, | |
129 | ppl_dimension_type offset, | |
130 | ppl_dimension_type dim_sctr) | |
131 | { | |
132 | ppl_Constraint_t new_cstr; | |
133 | ppl_Linear_Expression_t le; | |
134 | ppl_dimension_type i; | |
135 | ppl_dimension_type dim; | |
136 | ppl_Pointset_Powerset_C_Polyhedron_t temp; | |
137 | ||
138 | /* Add the equality: t_{time_depth} = t'_{time_depth} + 1. | |
139 | This is the core part of this alogrithm, since this | |
140 | constraint asks for the memory access stride (difference) | |
141 | between two consecutive points in time dimensions. */ | |
142 | ||
143 | ppl_Pointset_Powerset_C_Polyhedron_space_dimension (*p, &dim); | |
144 | ppl_new_Linear_Expression_with_dimension (&le, dim); | |
145 | ppl_set_coef (le, time_depth, 1); | |
146 | ppl_set_coef (le, time_depth + offset, -1); | |
147 | ppl_set_inhomogeneous (le, 1); | |
148 | ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL); | |
149 | ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr); | |
150 | ppl_delete_Linear_Expression (le); | |
151 | ppl_delete_Constraint (new_cstr); | |
152 | ||
153 | /* Add equalities: | |
154 | | t1 = t1' | |
155 | | ... | |
156 | | t_{time_depth-1} = t'_{time_depth-1} | |
157 | | t_{time_depth+1} = t'_{time_depth+1} | |
158 | | ... | |
159 | | t_{dim_sctr} = t'_{dim_sctr} | |
160 | ||
161 | This means that all the time dimensions are equal except for | |
162 | time_depth, where the constraint is t_{depth} = t'_{depth} + 1 | |
163 | step. More to this: we should be carefull not to add equalities | |
164 | to the 'coupled' dimensions, which happens when the one dimension | |
165 | is stripmined dimension, and the other dimension corresponds | |
166 | to the point loop inside stripmined dimension. */ | |
167 | ||
168 | ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p); | |
169 | ||
170 | for (i = 0; i < dim_sctr; i++) | |
171 | if (i != time_depth) | |
172 | { | |
173 | ppl_new_Linear_Expression_with_dimension (&le, dim); | |
174 | ppl_set_coef (le, i, 1); | |
175 | ppl_set_coef (le, i + offset, -1); | |
176 | ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL); | |
177 | ppl_Pointset_Powerset_C_Polyhedron_add_constraint (temp, new_cstr); | |
178 | ||
179 | if (ppl_Pointset_Powerset_C_Polyhedron_is_empty (temp)) | |
180 | { | |
181 | ppl_delete_Pointset_Powerset_C_Polyhedron (temp); | |
182 | ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p); | |
183 | } | |
184 | else | |
185 | ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr); | |
186 | ppl_delete_Linear_Expression (le); | |
187 | ppl_delete_Constraint (new_cstr); | |
188 | } | |
189 | ||
190 | ppl_delete_Pointset_Powerset_C_Polyhedron (temp); | |
191 | } | |
192 | ||
193 | ||
80020e9d | 194 | /* Set STRIDE to the stride of PDR in memory by advancing by one in |
acba5819 | 195 | the loop at DEPTH. */ |
80020e9d | 196 | |
197 | static void | |
0ef84e3b | 198 | pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr) |
80020e9d | 199 | { |
a1190c1b | 200 | ppl_dimension_type time_depth; |
80020e9d | 201 | ppl_Linear_Expression_t le, lma; |
202 | ppl_Constraint_t new_cstr; | |
80020e9d | 203 | ppl_dimension_type i, *map; |
5e18ab2b | 204 | ppl_Pointset_Powerset_C_Polyhedron_t p1, p2, sctr; |
205 | graphite_dim_t nb_subscripts = PDR_NB_SUBSCRIPTS (pdr) + 1; | |
206 | poly_bb_p pbb = PDR_PBB (pdr); | |
207 | ppl_dimension_type offset = pbb_nb_scattering_transform (pbb) | |
208 | + pbb_nb_local_vars (pbb) | |
209 | + pbb_dim_iter_domain (pbb); | |
210 | ppl_dimension_type offsetg = offset + pbb_nb_params (pbb); | |
211 | ppl_dimension_type dim_sctr = pbb_nb_scattering_transform (pbb) | |
212 | + pbb_nb_local_vars (pbb); | |
213 | ppl_dimension_type dim_L1 = offset + offsetg + 2 * nb_subscripts; | |
214 | ppl_dimension_type dim_L2 = offset + offsetg + 2 * nb_subscripts + 1; | |
215 | ppl_dimension_type new_dim = offset + offsetg + 2 * nb_subscripts + 2; | |
216 | ||
217 | /* The resulting polyhedron should have the following format: | |
218 | T|I|T'|I'|G|S|S'|l1|l2 | |
219 | where: | |
220 | | T = t_1..t_{dim_sctr} | |
221 | | I = i_1..i_{dim_iter_domain} | |
222 | | T'= t'_1..t'_{dim_sctr} | |
223 | | I'= i'_1..i'_{dim_iter_domain} | |
224 | | G = g_1..g_{nb_params} | |
225 | | S = s_1..s_{nb_subscripts} | |
226 | | S'= s'_1..s'_{nb_subscripts} | |
227 | | l1 and l2 are scalars. | |
228 | ||
229 | Some invariants: | |
230 | offset = dim_sctr + dim_iter_domain + nb_local_vars | |
231 | offsetg = dim_sctr + dim_iter_domain + nb_local_vars + nb_params. */ | |
232 | ||
233 | /* Construct the T|I|0|0|G|0|0|0|0 part. */ | |
234 | { | |
235 | ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron | |
236 | (&sctr, PBB_TRANSFORMED_SCATTERING (pbb)); | |
237 | ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed | |
238 | (sctr, 2 * nb_subscripts + 2); | |
239 | ppl_insert_dimensions_pointset (sctr, offset, offset); | |
240 | } | |
241 | ||
242 | /* Construct the 0|I|0|0|G|S|0|0|0 part. */ | |
243 | { | |
244 | ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron | |
245 | (&p1, PDR_ACCESSES (pdr)); | |
246 | ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed | |
247 | (p1, nb_subscripts + 2); | |
248 | ppl_insert_dimensions_pointset (p1, 0, dim_sctr); | |
249 | ppl_insert_dimensions_pointset (p1, offset, offset); | |
250 | } | |
251 | ||
252 | /* Construct the 0|0|0|0|0|S|0|l1|0 part. */ | |
253 | { | |
254 | lma = build_linearized_memory_access (offset + dim_sctr, pdr); | |
255 | ppl_set_coef (lma, dim_L1, -1); | |
256 | ppl_new_Constraint (&new_cstr, lma, PPL_CONSTRAINT_TYPE_EQUAL); | |
257 | ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p1, new_cstr); | |
a66257d5 | 258 | ppl_delete_Linear_Expression (lma); |
259 | ppl_delete_Constraint (new_cstr); | |
5e18ab2b | 260 | } |
261 | ||
dd3d3fae | 262 | /* Now intersect all the parts to get the polyhedron P1: |
5e18ab2b | 263 | T|I|0|0|G|0|0|0 |0 |
264 | 0|I|0|0|G|S|0|0 |0 | |
265 | 0|0|0|0|0|S|0|l1|0 | |
266 | ------------------ | |
267 | T|I|0|0|G|S|0|l1|0. */ | |
268 | ||
269 | ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, sctr); | |
270 | ppl_delete_Pointset_Powerset_C_Polyhedron (sctr); | |
271 | ||
272 | /* Build P2, which would have the following form: | |
273 | 0|0|T'|I'|G|0|S'|0|l2 | |
274 | ||
275 | P2 is built, by remapping the P1 polyhedron: | |
276 | T|I|0|0|G|S|0|l1|0 | |
277 | ||
278 | using the following mapping: | |
279 | T->T' | |
280 | I->I' | |
281 | S->S' | |
282 | l1->l2. */ | |
283 | { | |
284 | ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron | |
285 | (&p2, p1); | |
286 | ||
287 | map = ppl_new_id_map (new_dim); | |
288 | ||
dd3d3fae | 289 | /* TI -> T'I'. */ |
5e18ab2b | 290 | for (i = 0; i < offset; i++) |
291 | ppl_interchange (map, i, i + offset); | |
292 | ||
dd3d3fae | 293 | /* l1 -> l2. */ |
5e18ab2b | 294 | ppl_interchange (map, dim_L1, dim_L2); |
295 | ||
dd3d3fae | 296 | /* S -> S'. */ |
5e18ab2b | 297 | for (i = 0; i < nb_subscripts; i++) |
298 | ppl_interchange (map, offset + offsetg + i, | |
299 | offset + offsetg + nb_subscripts + i); | |
300 | ||
301 | ppl_Pointset_Powerset_C_Polyhedron_map_space_dimensions (p2, map, new_dim); | |
302 | free (map); | |
303 | } | |
304 | ||
a1190c1b | 305 | time_depth = psct_dynamic_dim (pbb, depth); |
80020e9d | 306 | |
307 | /* P1 = P1 inter P2. */ | |
a66257d5 | 308 | ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, p2); |
3bcc3204 | 309 | build_partial_difference (&p1, time_depth, offset, dim_sctr); |
80020e9d | 310 | |
311 | /* Maximise the expression L2 - L1. */ | |
5e18ab2b | 312 | { |
313 | ppl_new_Linear_Expression_with_dimension (&le, new_dim); | |
314 | ppl_set_coef (le, dim_L2, 1); | |
315 | ppl_set_coef (le, dim_L1, -1); | |
316 | ppl_max_for_le_pointset (p1, le, stride); | |
5e18ab2b | 317 | } |
a66257d5 | 318 | |
3bcc3204 | 319 | if (dump_file && (dump_flags & TDF_DETAILS)) |
320 | { | |
2d6fe479 | 321 | char *str; |
322 | void (*gmp_free) (void *, size_t); | |
323 | ||
3bcc3204 | 324 | fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:", |
325 | pbb_index (pbb), PDR_ID (pdr), (int) depth); | |
2d6fe479 | 326 | str = mpz_get_str (0, 10, stride); |
327 | fprintf (dump_file, " %s ", str); | |
328 | mp_get_memory_functions (NULL, NULL, &gmp_free); | |
329 | (*gmp_free) (str, strlen (str) + 1); | |
3bcc3204 | 330 | } |
331 | ||
a66257d5 | 332 | ppl_delete_Pointset_Powerset_C_Polyhedron (p1); |
333 | ppl_delete_Pointset_Powerset_C_Polyhedron (p2); | |
334 | ppl_delete_Linear_Expression (le); | |
c6bb733d | 335 | } |
336 | ||
b33d4eb4 | 337 | |
a16e8346 | 338 | /* Sets STRIDES to the sum of all the strides of the data references |
339 | accessed in LOOP at DEPTH. */ | |
acba5819 | 340 | |
341 | static void | |
0ef84e3b | 342 | memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides) |
acba5819 | 343 | { |
a16e8346 | 344 | int i, j; |
345 | lst_p l; | |
acba5819 | 346 | poly_dr_p pdr; |
0ef84e3b | 347 | mpz_t s, n; |
acba5819 | 348 | |
2d6fe479 | 349 | mpz_init (s); |
350 | mpz_init (n); | |
acba5819 | 351 | |
a16e8346 | 352 | for (j = 0; VEC_iterate (lst_p, LST_SEQ (loop), j, l); j++) |
353 | if (LST_LOOP_P (l)) | |
b33d4eb4 | 354 | memory_strides_in_loop_1 (l, depth, strides); |
a16e8346 | 355 | else |
356 | for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (LST_PBB (l)), i, pdr); i++) | |
357 | { | |
b33d4eb4 | 358 | pdr_stride_in_loop (s, depth, pdr); |
2d6fe479 | 359 | mpz_set_si (n, PDR_NB_REFS (pdr)); |
360 | mpz_mul (s, s, n); | |
361 | mpz_add (strides, strides, s); | |
a16e8346 | 362 | } |
acba5819 | 363 | |
2d6fe479 | 364 | mpz_clear (s); |
365 | mpz_clear (n); | |
acba5819 | 366 | } |
367 | ||
b33d4eb4 | 368 | /* Sets STRIDES to the sum of all the strides of the data references |
369 | accessed in LOOP at DEPTH. */ | |
370 | ||
371 | static void | |
0ef84e3b | 372 | memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides) |
b33d4eb4 | 373 | { |
2d6fe479 | 374 | if (mpz_cmp_si (loop->memory_strides, -1) == 0) |
b33d4eb4 | 375 | { |
2d6fe479 | 376 | mpz_set_si (strides, 0); |
b33d4eb4 | 377 | memory_strides_in_loop_1 (loop, depth, strides); |
378 | } | |
379 | else | |
2d6fe479 | 380 | mpz_set (strides, loop->memory_strides); |
b33d4eb4 | 381 | } |
382 | ||
a16e8346 | 383 | /* Return true when the interchange of loops LOOP1 and LOOP2 is |
384 | profitable. | |
80020e9d | 385 | |
386 | Example: | |
387 | ||
388 | | int a[100][100]; | |
389 | | | |
390 | | int | |
391 | | foo (int N) | |
392 | | { | |
393 | | int j; | |
394 | | int i; | |
395 | | | |
396 | | for (i = 0; i < N; i++) | |
397 | | for (j = 0; j < N; j++) | |
398 | | a[j][2 * i] += 1; | |
399 | | | |
400 | | return a[N][12]; | |
401 | | } | |
402 | ||
403 | The data access A[j][i] is described like this: | |
404 | ||
405 | | i j N a s0 s1 1 | |
406 | | 0 0 0 1 0 0 -5 = 0 | |
407 | | 0 -1 0 0 1 0 0 = 0 | |
408 | |-2 0 0 0 0 1 0 = 0 | |
409 | | 0 0 0 0 1 0 0 >= 0 | |
410 | | 0 0 0 0 0 1 0 >= 0 | |
411 | | 0 0 0 0 -1 0 100 >= 0 | |
412 | | 0 0 0 0 0 -1 100 >= 0 | |
413 | ||
414 | The linearized memory access L to A[100][100] is: | |
415 | ||
416 | | i j N a s0 s1 1 | |
417 | | 0 0 0 0 100 1 0 | |
418 | ||
5e18ab2b | 419 | TODO: the shown format is not valid as it does not show the fact |
420 | that the iteration domain "i j" is transformed using the scattering. | |
421 | ||
80020e9d | 422 | Next, to measure the impact of iterating once in loop "i", we build |
423 | a maximization problem: first, we add to DR accesses the dimensions | |
3d695986 | 424 | k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1. |
425 | L1 and L2 are the linearized memory access functions. | |
80020e9d | 426 | |
427 | | i j N a s0 s1 k s2 s3 L1 L2 D1 1 | |
428 | | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5 | |
429 | | 0 -1 0 0 1 0 0 0 0 0 0 0 0 = 0 s0 = j | |
430 | |-2 0 0 0 0 1 0 0 0 0 0 0 0 = 0 s1 = 2 * i | |
431 | | 0 0 0 0 1 0 0 0 0 0 0 0 0 >= 0 | |
432 | | 0 0 0 0 0 1 0 0 0 0 0 0 0 >= 0 | |
433 | | 0 0 0 0 -1 0 0 0 0 0 0 0 100 >= 0 | |
434 | | 0 0 0 0 0 -1 0 0 0 0 0 0 100 >= 0 | |
435 | | 0 0 0 0 100 1 0 0 0 -1 0 0 0 = 0 L1 = 100 * s0 + s1 | |
436 | ||
437 | Then, we generate the polyhedron P2 by interchanging the dimensions | |
5e18ab2b | 438 | (s0, s2), (s1, s3), (L1, L2), (k, i) |
80020e9d | 439 | |
440 | | i j N a s0 s1 k s2 s3 L1 L2 D1 1 | |
441 | | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5 | |
442 | | 0 -1 0 0 0 0 0 1 0 0 0 0 0 = 0 s2 = j | |
443 | | 0 0 0 0 0 0 -2 0 1 0 0 0 0 = 0 s3 = 2 * k | |
444 | | 0 0 0 0 0 0 0 1 0 0 0 0 0 >= 0 | |
445 | | 0 0 0 0 0 0 0 0 1 0 0 0 0 >= 0 | |
446 | | 0 0 0 0 0 0 0 -1 0 0 0 0 100 >= 0 | |
447 | | 0 0 0 0 0 0 0 0 -1 0 0 0 100 >= 0 | |
448 | | 0 0 0 0 0 0 0 100 1 0 -1 0 0 = 0 L2 = 100 * s2 + s3 | |
449 | ||
450 | then we add to P2 the equality k = i + 1: | |
451 | ||
452 | |-1 0 0 0 0 0 1 0 0 0 0 0 -1 = 0 k = i + 1 | |
453 | ||
454 | and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)". | |
455 | ||
5e18ab2b | 456 | Similarly, to determine the impact of one iteration on loop "j", we |
80020e9d | 457 | interchange (k, j), we add "k = j + 1", and we compute D2 the |
458 | maximal value of the difference. | |
459 | ||
460 | Finally, the profitability test is D1 < D2: if in the outer loop | |
461 | the strides are smaller than in the inner loop, then it is | |
462 | profitable to interchange the loops at DEPTH1 and DEPTH2. */ | |
c6bb733d | 463 | |
464 | static bool | |
a16e8346 | 465 | lst_interchange_profitable_p (lst_p loop1, lst_p loop2) |
c6bb733d | 466 | { |
0ef84e3b | 467 | mpz_t d1, d2; |
c6bb733d | 468 | bool res; |
469 | ||
a16e8346 | 470 | gcc_assert (loop1 && loop2 |
471 | && LST_LOOP_P (loop1) && LST_LOOP_P (loop2) | |
472 | && lst_depth (loop1) < lst_depth (loop2)); | |
c6bb733d | 473 | |
2d6fe479 | 474 | mpz_init (d1); |
475 | mpz_init (d2); | |
c6bb733d | 476 | |
a16e8346 | 477 | memory_strides_in_loop (loop1, lst_depth (loop1), d1); |
478 | memory_strides_in_loop (loop2, lst_depth (loop2), d2); | |
c6bb733d | 479 | |
6955599b | 480 | res = mpz_cmp (d1, d2) < 0; |
c6bb733d | 481 | |
2d6fe479 | 482 | mpz_clear (d1); |
483 | mpz_clear (d2); | |
c6bb733d | 484 | |
485 | return res; | |
486 | } | |
487 | ||
488 | /* Interchanges the loops at DEPTH1 and DEPTH2 of the original | |
489 | scattering and assigns the resulting polyhedron to the transformed | |
490 | scattering. */ | |
491 | ||
492 | static void | |
5e18ab2b | 493 | pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2, |
494 | poly_bb_p pbb) | |
c6bb733d | 495 | { |
496 | ppl_dimension_type i, dim; | |
497 | ppl_dimension_type *map; | |
498 | ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); | |
5e18ab2b | 499 | ppl_dimension_type dim1 = psct_dynamic_dim (pbb, depth1); |
500 | ppl_dimension_type dim2 = psct_dynamic_dim (pbb, depth2); | |
c6bb733d | 501 | |
502 | ppl_Polyhedron_space_dimension (poly, &dim); | |
503 | map = (ppl_dimension_type *) XNEWVEC (ppl_dimension_type, dim); | |
504 | ||
505 | for (i = 0; i < dim; i++) | |
506 | map[i] = i; | |
507 | ||
508 | map[dim1] = dim2; | |
509 | map[dim2] = dim1; | |
510 | ||
511 | ppl_Polyhedron_map_space_dimensions (poly, map, dim); | |
512 | free (map); | |
513 | } | |
514 | ||
4d0382bc | 515 | /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all |
516 | the statements below LST. */ | |
517 | ||
518 | static void | |
519 | lst_apply_interchange (lst_p lst, int depth1, int depth2) | |
520 | { | |
521 | if (!lst) | |
522 | return; | |
523 | ||
524 | if (LST_LOOP_P (lst)) | |
525 | { | |
526 | int i; | |
527 | lst_p l; | |
528 | ||
529 | for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++) | |
530 | lst_apply_interchange (l, depth1, depth2); | |
531 | } | |
532 | else | |
533 | pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst)); | |
534 | } | |
535 | ||
06ced013 | 536 | /* Return true when the nest starting at LOOP1 and ending on LOOP2 is |
537 | perfect: i.e. there are no sequence of statements. */ | |
538 | ||
539 | static bool | |
540 | lst_perfectly_nested_p (lst_p loop1, lst_p loop2) | |
541 | { | |
542 | if (loop1 == loop2) | |
543 | return true; | |
544 | ||
545 | if (!LST_LOOP_P (loop1)) | |
546 | return false; | |
547 | ||
548 | return VEC_length (lst_p, LST_SEQ (loop1)) == 1 | |
549 | && lst_perfectly_nested_p (VEC_index (lst_p, LST_SEQ (loop1), 0), loop2); | |
550 | } | |
551 | ||
552 | /* Transform the loop nest between LOOP1 and LOOP2 into a perfect | |
553 | nest. To continue the naming tradition, this function is called | |
87d25ca7 | 554 | after perfect_nestify. NEST is set to the perfectly nested loop |
555 | that is created. BEFORE/AFTER are set to the loops distributed | |
556 | before/after the loop NEST. */ | |
06ced013 | 557 | |
558 | static void | |
87d25ca7 | 559 | lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before, |
560 | lst_p *nest, lst_p *after) | |
06ced013 | 561 | { |
06ced013 | 562 | poly_bb_p first, last; |
563 | ||
564 | gcc_assert (loop1 && loop2 | |
565 | && loop1 != loop2 | |
566 | && LST_LOOP_P (loop1) && LST_LOOP_P (loop2)); | |
567 | ||
568 | first = LST_PBB (lst_find_first_pbb (loop2)); | |
569 | last = LST_PBB (lst_find_last_pbb (loop2)); | |
570 | ||
87d25ca7 | 571 | *before = copy_lst (loop1); |
572 | *nest = copy_lst (loop1); | |
573 | *after = copy_lst (loop1); | |
06ced013 | 574 | |
87d25ca7 | 575 | lst_remove_all_before_including_pbb (*before, first, false); |
576 | lst_remove_all_before_including_pbb (*after, last, true); | |
06ced013 | 577 | |
87d25ca7 | 578 | lst_remove_all_before_excluding_pbb (*nest, first, true); |
579 | lst_remove_all_before_excluding_pbb (*nest, last, false); | |
673c512e | 580 | |
581 | if (lst_empty_p (*before)) | |
cfd3faac | 582 | { |
583 | free_lst (*before); | |
584 | *before = NULL; | |
585 | } | |
673c512e | 586 | if (lst_empty_p (*after)) |
cfd3faac | 587 | { |
588 | free_lst (*after); | |
589 | *after = NULL; | |
590 | } | |
673c512e | 591 | if (lst_empty_p (*nest)) |
cfd3faac | 592 | { |
593 | free_lst (*nest); | |
594 | *nest = NULL; | |
595 | } | |
06ced013 | 596 | } |
4d0382bc | 597 | |
598 | /* Try to interchange LOOP1 with LOOP2 for all the statements of the | |
599 | body of LOOP2. LOOP1 contains LOOP2. Return true if it did the | |
a1a6700b | 600 | interchange. */ |
4d0382bc | 601 | |
602 | static bool | |
a1a6700b | 603 | lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2) |
4d0382bc | 604 | { |
605 | int depth1 = lst_depth (loop1); | |
606 | int depth2 = lst_depth (loop2); | |
87d25ca7 | 607 | lst_p transformed; |
608 | ||
a1a6700b | 609 | lst_p before = NULL, nest = NULL, after = NULL; |
4d0382bc | 610 | |
a16e8346 | 611 | if (!lst_interchange_profitable_p (loop1, loop2)) |
4d0382bc | 612 | return false; |
613 | ||
06ced013 | 614 | if (!lst_perfectly_nested_p (loop1, loop2)) |
a1a6700b | 615 | lst_perfect_nestify (loop1, loop2, &before, &nest, &after); |
06ced013 | 616 | |
4d0382bc | 617 | lst_apply_interchange (loop2, depth1, depth2); |
618 | ||
87d25ca7 | 619 | /* Sync the transformed LST information and the PBB scatterings |
620 | before using the scatterings in the data dependence analysis. */ | |
a1a6700b | 621 | if (before || nest || after) |
87d25ca7 | 622 | { |
623 | transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1, | |
a1a6700b | 624 | before, nest, after); |
87d25ca7 | 625 | lst_update_scattering (transformed); |
626 | free_lst (transformed); | |
627 | } | |
628 | ||
4d0382bc | 629 | if (graphite_legal_transform (scop)) |
630 | { | |
631 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
632 | fprintf (dump_file, | |
633 | "Loops at depths %d and %d will be interchanged.\n", | |
634 | depth1, depth2); | |
635 | ||
87d25ca7 | 636 | /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP. */ |
a1a6700b | 637 | lst_insert_in_sequence (before, loop1, true); |
638 | lst_insert_in_sequence (after, loop1, false); | |
87d25ca7 | 639 | |
a1a6700b | 640 | if (nest) |
87d25ca7 | 641 | { |
a1a6700b | 642 | lst_replace (loop1, nest); |
87d25ca7 | 643 | free_lst (loop1); |
644 | } | |
645 | ||
4d0382bc | 646 | return true; |
647 | } | |
648 | ||
649 | /* Undo the transform. */ | |
cfd3faac | 650 | free_lst (before); |
651 | free_lst (nest); | |
652 | free_lst (after); | |
4d0382bc | 653 | lst_apply_interchange (loop2, depth2, depth1); |
654 | return false; | |
655 | } | |
656 | ||
673c512e | 657 | /* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged |
658 | with the loop OUTER in LST_SEQ (OUTER_FATHER). */ | |
4d0382bc | 659 | |
660 | static bool | |
673c512e | 661 | lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer, |
662 | lst_p inner_father) | |
4d0382bc | 663 | { |
673c512e | 664 | int inner; |
a1a6700b | 665 | lst_p loop1, loop2; |
87d25ca7 | 666 | |
673c512e | 667 | gcc_assert (outer_father |
668 | && LST_LOOP_P (outer_father) | |
669 | && LST_LOOP_P (VEC_index (lst_p, LST_SEQ (outer_father), outer)) | |
670 | && inner_father | |
671 | && LST_LOOP_P (inner_father)); | |
4d0382bc | 672 | |
a1a6700b | 673 | loop1 = VEC_index (lst_p, LST_SEQ (outer_father), outer); |
87d25ca7 | 674 | |
a1a6700b | 675 | for (inner = 0; VEC_iterate (lst_p, LST_SEQ (inner_father), inner, loop2); inner++) |
676 | if (LST_LOOP_P (loop2) | |
677 | && (lst_try_interchange_loops (scop, loop1, loop2) | |
678 | || lst_interchange_select_inner (scop, outer_father, outer, loop2))) | |
679 | return true; | |
680 | ||
681 | return false; | |
87d25ca7 | 682 | } |
4d0382bc | 683 | |
87d25ca7 | 684 | /* Interchanges all the loops of LOOP and the loops of its body that |
685 | are considered profitable to interchange. Return true if it did | |
673c512e | 686 | interchanged some loops. OUTER is the index in LST_SEQ (LOOP) that |
687 | points to the next outer loop to be considered for interchange. */ | |
4d0382bc | 688 | |
87d25ca7 | 689 | static bool |
673c512e | 690 | lst_interchange_select_outer (scop_p scop, lst_p loop, int outer) |
87d25ca7 | 691 | { |
692 | lst_p l; | |
693 | bool res = false; | |
673c512e | 694 | int i = 0; |
695 | lst_p father; | |
4d0382bc | 696 | |
87d25ca7 | 697 | if (!loop || !LST_LOOP_P (loop)) |
698 | return false; | |
4d0382bc | 699 | |
673c512e | 700 | father = LST_LOOP_FATHER (loop); |
701 | if (father) | |
702 | { | |
a1a6700b | 703 | while (lst_interchange_select_inner (scop, father, outer, loop)) |
704 | { | |
705 | res = true; | |
706 | loop = VEC_index (lst_p, LST_SEQ (father), outer); | |
707 | } | |
673c512e | 708 | } |
709 | ||
710 | if (LST_LOOP_P (loop)) | |
711 | for (i = 0; VEC_iterate (lst_p, LST_SEQ (loop), i, l); i++) | |
712 | if (LST_LOOP_P (l)) | |
713 | res |= lst_interchange_select_outer (scop, l, i); | |
87d25ca7 | 714 | |
87d25ca7 | 715 | return res; |
c6bb733d | 716 | } |
717 | ||
718 | /* Interchanges all the loop depths that are considered profitable for SCOP. */ | |
719 | ||
720 | bool | |
721 | scop_do_interchange (scop_p scop) | |
722 | { | |
673c512e | 723 | bool res = lst_interchange_select_outer |
724 | (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0); | |
87d25ca7 | 725 | |
726 | lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop)); | |
a741358d | 727 | |
06ced013 | 728 | return res; |
c6bb733d | 729 | } |
730 | ||
4d0382bc | 731 | |
c6bb733d | 732 | #endif |
733 |