]>
Commit | Line | Data |
---|---|---|
b60cc080 | 1 | /* A scheduling optimizer for Graphite |
d1e082c2 | 2 | Copyright (C) 2012-2013 Free Software Foundation, Inc. |
b60cc080 TG |
3 | Contributed by Tobias Grosser <tobias@grosser.es>. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
22 | ||
23 | #ifdef HAVE_cloog | |
24 | #include <isl/set.h> | |
25 | #include <isl/map.h> | |
26 | #include <isl/union_map.h> | |
27 | #include <isl/schedule.h> | |
28 | #include <isl/band.h> | |
29 | #include <isl/aff.h> | |
30 | #include <isl/options.h> | |
02663f24 | 31 | #endif |
b60cc080 TG |
32 | |
33 | #include "system.h" | |
34 | #include "coretypes.h" | |
4d648807 | 35 | #include "tree.h" |
442b4905 | 36 | #include "gimple.h" |
5be5c238 | 37 | #include "gimple-iterator.h" |
442b4905 | 38 | #include "tree-ssa-loop.h" |
7ee2468b | 39 | #include "dumpfile.h" |
b60cc080 TG |
40 | #include "cfgloop.h" |
41 | #include "tree-chrec.h" | |
42 | #include "tree-data-ref.h" | |
43 | #include "tree-scalar-evolution.h" | |
44 | #include "sese.h" | |
45 | ||
02663f24 | 46 | #ifdef HAVE_cloog |
b60cc080 TG |
47 | #include "graphite-poly.h" |
48 | ||
49 | static isl_union_set * | |
50 | scop_get_domains (scop_p scop ATTRIBUTE_UNUSED) | |
51 | { | |
52 | int i; | |
53 | poly_bb_p pbb; | |
54 | isl_space *space = isl_set_get_space (scop->context); | |
55 | isl_union_set *res = isl_union_set_empty (space); | |
56 | ||
9771b263 | 57 | FOR_EACH_VEC_ELT (scop->bbs, i, pbb) |
b60cc080 TG |
58 | res = isl_union_set_add_set (res, isl_set_copy (pbb->domain)); |
59 | ||
60 | return res; | |
61 | } | |
62 | ||
63 | static isl_union_map * | |
64 | scop_get_dependences (scop_p scop) | |
65 | { | |
66 | isl_union_map *dependences; | |
67 | ||
68 | if (!scop->must_raw) | |
69 | compute_deps (scop, SCOP_BBS (scop), | |
70 | &scop->must_raw, &scop->may_raw, | |
71 | &scop->must_raw_no_source, &scop->may_raw_no_source, | |
72 | &scop->must_war, &scop->may_war, | |
73 | &scop->must_war_no_source, &scop->may_war_no_source, | |
74 | &scop->must_waw, &scop->may_waw, | |
75 | &scop->must_waw_no_source, &scop->may_waw_no_source); | |
76 | ||
77 | dependences = isl_union_map_copy (scop->must_raw); | |
78 | dependences = isl_union_map_union (dependences, | |
79 | isl_union_map_copy (scop->must_war)); | |
80 | dependences = isl_union_map_union (dependences, | |
81 | isl_union_map_copy (scop->must_waw)); | |
82 | dependences = isl_union_map_union (dependences, | |
83 | isl_union_map_copy (scop->may_raw)); | |
84 | dependences = isl_union_map_union (dependences, | |
85 | isl_union_map_copy (scop->may_war)); | |
86 | dependences = isl_union_map_union (dependences, | |
87 | isl_union_map_copy (scop->may_waw)); | |
88 | ||
89 | return dependences; | |
90 | } | |
91 | ||
92 | /* getTileMap - Create a map that describes a n-dimensonal tiling. | |
93 | ||
94 | getTileMap creates a map from a n-dimensional scattering space into an | |
95 | 2*n-dimensional scattering space. The map describes a rectangular tiling. | |
96 | ||
97 | Example: | |
98 | scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32 | |
99 | ||
100 | tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: | |
101 | t0 % 32 = 0 and t0 <= s0 < t0 + 32 and | |
102 | t1 % 32 = 0 and t1 <= s1 < t1 + 32} | |
103 | ||
104 | Before tiling: | |
105 | ||
106 | for (i = 0; i < N; i++) | |
107 | for (j = 0; j < M; j++) | |
108 | S(i,j) | |
109 | ||
110 | After tiling: | |
111 | ||
112 | for (t_i = 0; t_i < N; i+=32) | |
113 | for (t_j = 0; t_j < M; j+=32) | |
114 | for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 | |
115 | for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0 | |
116 | S(i,j) | |
117 | */ | |
118 | ||
119 | static isl_basic_map * | |
c3284718 | 120 | getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize) |
b60cc080 TG |
121 | { |
122 | int x; | |
123 | /* We construct | |
124 | ||
125 | tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: | |
126 | s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and | |
127 | s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32} | |
128 | ||
129 | and project out the auxilary dimensions a0 and a1. */ | |
c3284718 RS |
130 | isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions, |
131 | scheduleDimensions * 3); | |
132 | isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space)); | |
b60cc080 | 133 | |
c3284718 | 134 | isl_local_space *LocalSpace = isl_local_space_from_space (Space); |
b60cc080 TG |
135 | |
136 | for (x = 0; x < scheduleDimensions; x++) | |
137 | { | |
138 | int sX = x; | |
139 | int tX = x; | |
140 | int pX = scheduleDimensions + x; | |
141 | int aX = 2 * scheduleDimensions + x; | |
142 | ||
143 | isl_constraint *c; | |
144 | ||
145 | /* sX = aX * tileSize; */ | |
c3284718 RS |
146 | c = isl_equality_alloc (isl_local_space_copy (LocalSpace)); |
147 | isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1); | |
148 | isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize); | |
149 | tileMap = isl_basic_map_add_constraint (tileMap, c); | |
b60cc080 TG |
150 | |
151 | /* pX = sX; */ | |
c3284718 RS |
152 | c = isl_equality_alloc (isl_local_space_copy (LocalSpace)); |
153 | isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1); | |
154 | isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1); | |
155 | tileMap = isl_basic_map_add_constraint (tileMap, c); | |
b60cc080 TG |
156 | |
157 | /* tX <= pX */ | |
c3284718 RS |
158 | c = isl_inequality_alloc (isl_local_space_copy (LocalSpace)); |
159 | isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1); | |
160 | isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1); | |
161 | tileMap = isl_basic_map_add_constraint (tileMap, c); | |
b60cc080 TG |
162 | |
163 | /* pX <= tX + (tileSize - 1) */ | |
c3284718 RS |
164 | c = isl_inequality_alloc (isl_local_space_copy (LocalSpace)); |
165 | isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1); | |
166 | isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1); | |
167 | isl_constraint_set_constant_si (c, tileSize - 1); | |
168 | tileMap = isl_basic_map_add_constraint (tileMap, c); | |
b60cc080 TG |
169 | } |
170 | ||
688010ba | 171 | /* Project out auxiliary dimensions. |
b60cc080 | 172 | |
688010ba | 173 | The auxiliary dimensions are transformed into existentially quantified ones. |
b60cc080 TG |
174 | This reduces the number of visible scattering dimensions and allows Cloog |
175 | to produces better code. */ | |
c3284718 RS |
176 | tileMap = isl_basic_map_project_out (tileMap, isl_dim_out, |
177 | 2 * scheduleDimensions, | |
178 | scheduleDimensions); | |
179 | isl_local_space_free (LocalSpace); | |
b60cc080 TG |
180 | return tileMap; |
181 | } | |
182 | ||
183 | /* getScheduleForBand - Get the schedule for this band. | |
184 | ||
185 | Polly applies transformations like tiling on top of the isl calculated value. | |
186 | This can influence the number of scheduling dimension. The number of | |
187 | schedule dimensions is returned in the parameter 'Dimension'. */ | |
188 | static bool DisableTiling = false; | |
189 | ||
190 | static isl_union_map * | |
c3284718 | 191 | getScheduleForBand (isl_band *Band, int *Dimensions) |
b60cc080 TG |
192 | { |
193 | isl_union_map *PartialSchedule; | |
194 | isl_ctx *ctx; | |
195 | isl_space *Space; | |
196 | isl_basic_map *TileMap; | |
197 | isl_union_map *TileUMap; | |
198 | ||
c3284718 RS |
199 | PartialSchedule = isl_band_get_partial_schedule (Band); |
200 | *Dimensions = isl_band_n_member (Band); | |
b60cc080 TG |
201 | |
202 | if (DisableTiling) | |
203 | return PartialSchedule; | |
204 | ||
205 | /* It does not make any sense to tile a band with just one dimension. */ | |
206 | if (*Dimensions == 1) | |
207 | return PartialSchedule; | |
208 | ||
c3284718 RS |
209 | ctx = isl_union_map_get_ctx (PartialSchedule); |
210 | Space = isl_union_map_get_space (PartialSchedule); | |
b60cc080 | 211 | |
c3284718 RS |
212 | TileMap = getTileMap (ctx, *Dimensions, 32); |
213 | TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap)); | |
214 | TileUMap = isl_union_map_align_params (TileUMap, Space); | |
b60cc080 TG |
215 | *Dimensions = 2 * *Dimensions; |
216 | ||
c3284718 | 217 | return isl_union_map_apply_range (PartialSchedule, TileUMap); |
b60cc080 TG |
218 | } |
219 | ||
220 | /* Create a map that pre-vectorizes one scheduling dimension. | |
221 | ||
222 | getPrevectorMap creates a map that maps each input dimension to the same | |
223 | output dimension, except for the dimension DimToVectorize. DimToVectorize is | |
224 | strip mined by 'VectorWidth' and the newly created point loop of | |
225 | DimToVectorize is moved to the innermost level. | |
226 | ||
227 | Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4): | |
228 | ||
229 | | Before transformation | |
230 | | | |
231 | | A[i,j] -> [i,j] | |
232 | | | |
233 | | for (i = 0; i < 128; i++) | |
234 | | for (j = 0; j < 128; j++) | |
235 | | A(i,j); | |
236 | ||
237 | Prevector map: | |
238 | [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip | |
239 | ||
240 | | After transformation: | |
241 | | | |
242 | | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip | |
243 | | | |
244 | | for (it = 0; it < 128; it+=4) | |
245 | | for (j = 0; j < 128; j++) | |
246 | | for (ip = max(0,it); ip < min(128, it + 3); ip++) | |
247 | | A(ip,j); | |
248 | ||
249 | The goal of this transformation is to create a trivially vectorizable loop. | |
250 | This means a parallel loop at the innermost level that has a constant number | |
251 | of iterations corresponding to the target vector width. | |
252 | ||
253 | This transformation creates a loop at the innermost level. The loop has a | |
254 | constant number of iterations, if the number of loop iterations at | |
255 | DimToVectorize can be devided by VectorWidth. The default VectorWidth is | |
256 | currently constant and not yet target specific. This function does not reason | |
257 | about parallelism. */ | |
258 | static isl_map * | |
c3284718 RS |
259 | getPrevectorMap (isl_ctx *ctx, int DimToVectorize, |
260 | int ScheduleDimensions, | |
261 | int VectorWidth) | |
b60cc080 TG |
262 | { |
263 | isl_space *Space; | |
264 | isl_local_space *LocalSpace, *LocalSpaceRange; | |
265 | isl_set *Modulo; | |
266 | isl_map *TilingMap; | |
267 | isl_constraint *c; | |
268 | isl_aff *Aff; | |
269 | int PointDimension; /* ip */ | |
270 | int TileDimension; /* it */ | |
271 | isl_int VectorWidthMP; | |
272 | int i; | |
273 | ||
274 | /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/ | |
275 | ||
c3284718 RS |
276 | Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1); |
277 | TilingMap = isl_map_universe (isl_space_copy (Space)); | |
278 | LocalSpace = isl_local_space_from_space (Space); | |
b60cc080 TG |
279 | PointDimension = ScheduleDimensions; |
280 | TileDimension = DimToVectorize; | |
281 | ||
282 | /* Create an identity map for everything except DimToVectorize and map | |
283 | DimToVectorize to the point loop at the innermost dimension. */ | |
284 | for (i = 0; i < ScheduleDimensions; i++) | |
285 | { | |
c3284718 RS |
286 | c = isl_equality_alloc (isl_local_space_copy (LocalSpace)); |
287 | isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1); | |
b60cc080 TG |
288 | |
289 | if (i == DimToVectorize) | |
c3284718 | 290 | isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1); |
b60cc080 | 291 | else |
c3284718 | 292 | isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1); |
b60cc080 | 293 | |
c3284718 | 294 | TilingMap = isl_map_add_constraint (TilingMap, c); |
b60cc080 TG |
295 | } |
296 | ||
297 | /* it % 'VectorWidth' = 0 */ | |
c3284718 RS |
298 | LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace)); |
299 | Aff = isl_aff_zero_on_domain (LocalSpaceRange); | |
300 | Aff = isl_aff_set_constant_si (Aff, VectorWidth); | |
301 | Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1); | |
302 | isl_int_init (VectorWidthMP); | |
303 | isl_int_set_si (VectorWidthMP, VectorWidth); | |
304 | Aff = isl_aff_mod (Aff, VectorWidthMP); | |
305 | isl_int_clear (VectorWidthMP); | |
306 | Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff)); | |
307 | TilingMap = isl_map_intersect_range (TilingMap, Modulo); | |
b60cc080 TG |
308 | |
309 | /* it <= ip */ | |
c3284718 RS |
310 | c = isl_inequality_alloc (isl_local_space_copy (LocalSpace)); |
311 | isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1); | |
312 | isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1); | |
313 | TilingMap = isl_map_add_constraint (TilingMap, c); | |
b60cc080 TG |
314 | |
315 | /* ip <= it + ('VectorWidth' - 1) */ | |
c3284718 RS |
316 | c = isl_inequality_alloc (LocalSpace); |
317 | isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1); | |
318 | isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1); | |
319 | isl_constraint_set_constant_si (c, VectorWidth - 1); | |
320 | TilingMap = isl_map_add_constraint (TilingMap, c); | |
b60cc080 | 321 | |
c3284718 | 322 | isl_map_dump (TilingMap); |
b60cc080 TG |
323 | |
324 | return TilingMap; | |
325 | } | |
326 | ||
327 | static bool EnablePollyVector = false; | |
328 | ||
329 | /* getScheduleForBandList - Get the scheduling map for a list of bands. | |
330 | ||
331 | We walk recursively the forest of bands to combine the schedules of the | |
332 | individual bands to the overall schedule. In case tiling is requested, | |
333 | the individual bands are tiled. */ | |
334 | static isl_union_map * | |
c3284718 | 335 | getScheduleForBandList (isl_band_list *BandList) |
b60cc080 TG |
336 | { |
337 | int NumBands, i; | |
338 | isl_union_map *Schedule; | |
339 | isl_ctx *ctx; | |
340 | ||
c3284718 RS |
341 | ctx = isl_band_list_get_ctx (BandList); |
342 | NumBands = isl_band_list_n_band (BandList); | |
343 | Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0)); | |
b60cc080 TG |
344 | |
345 | for (i = 0; i < NumBands; i++) | |
346 | { | |
347 | isl_band *Band; | |
348 | isl_union_map *PartialSchedule; | |
349 | int ScheduleDimensions; | |
350 | isl_space *Space; | |
351 | ||
c3284718 RS |
352 | Band = isl_band_list_get_band (BandList, i); |
353 | PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions); | |
354 | Space = isl_union_map_get_space (PartialSchedule); | |
b60cc080 | 355 | |
c3284718 | 356 | if (isl_band_has_children (Band)) |
b60cc080 TG |
357 | { |
358 | isl_band_list *Children; | |
359 | isl_union_map *SuffixSchedule; | |
360 | ||
c3284718 RS |
361 | Children = isl_band_get_children (Band); |
362 | SuffixSchedule = getScheduleForBandList (Children); | |
363 | PartialSchedule = isl_union_map_flat_range_product (PartialSchedule, | |
364 | SuffixSchedule); | |
365 | isl_band_list_free (Children); | |
b60cc080 TG |
366 | } |
367 | else if (EnablePollyVector) | |
368 | { | |
369 | for (i = ScheduleDimensions - 1 ; i >= 0 ; i--) | |
370 | { | |
c3284718 | 371 | if (isl_band_member_is_zero_distance (Band, i)) |
b60cc080 TG |
372 | { |
373 | isl_map *TileMap; | |
374 | isl_union_map *TileUMap; | |
375 | ||
c3284718 RS |
376 | TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4); |
377 | TileUMap = isl_union_map_from_map (TileMap); | |
378 | TileUMap = isl_union_map_align_params | |
379 | (TileUMap, isl_space_copy (Space)); | |
380 | PartialSchedule = isl_union_map_apply_range | |
381 | (PartialSchedule, TileUMap); | |
b60cc080 TG |
382 | break; |
383 | } | |
384 | } | |
385 | } | |
386 | ||
c3284718 | 387 | Schedule = isl_union_map_union (Schedule, PartialSchedule); |
b60cc080 | 388 | |
c3284718 RS |
389 | isl_band_free (Band); |
390 | isl_space_free (Space); | |
b60cc080 TG |
391 | } |
392 | ||
393 | return Schedule; | |
394 | } | |
395 | ||
396 | static isl_union_map * | |
c3284718 | 397 | getScheduleMap (isl_schedule *Schedule) |
b60cc080 | 398 | { |
c3284718 RS |
399 | isl_band_list *BandList = isl_schedule_get_band_forest (Schedule); |
400 | isl_union_map *ScheduleMap = getScheduleForBandList (BandList); | |
401 | isl_band_list_free (BandList); | |
b60cc080 TG |
402 | return ScheduleMap; |
403 | } | |
404 | ||
405 | static int | |
c3284718 | 406 | getSingleMap (__isl_take isl_map *map, void *user) |
b60cc080 TG |
407 | { |
408 | isl_map **singleMap = (isl_map **) user; | |
409 | *singleMap = map; | |
410 | ||
411 | return 0; | |
412 | } | |
413 | ||
414 | static void | |
415 | apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map) | |
416 | { | |
417 | int i; | |
418 | poly_bb_p pbb; | |
419 | ||
9771b263 | 420 | FOR_EACH_VEC_ELT (scop->bbs, i, pbb) |
b60cc080 TG |
421 | { |
422 | isl_set *domain = isl_set_copy (pbb->domain); | |
423 | isl_union_map *stmtBand; | |
424 | isl_map *stmtSchedule; | |
425 | ||
c3284718 RS |
426 | stmtBand = isl_union_map_intersect_domain |
427 | (isl_union_map_copy (schedule_map), | |
428 | isl_union_set_from_set (domain)); | |
429 | isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule); | |
430 | isl_map_free (pbb->transformed); | |
b60cc080 | 431 | pbb->transformed = stmtSchedule; |
c3284718 | 432 | isl_union_map_free (stmtBand); |
b60cc080 TG |
433 | } |
434 | } | |
435 | ||
436 | static const int CONSTANT_BOUND = 20; | |
437 | ||
438 | bool | |
439 | optimize_isl (scop_p scop) | |
440 | { | |
441 | ||
442 | isl_schedule *schedule; | |
443 | isl_union_set *domain; | |
444 | isl_union_map *validity, *proximity, *dependences; | |
445 | isl_union_map *schedule_map; | |
446 | ||
447 | domain = scop_get_domains (scop); | |
448 | dependences = scop_get_dependences (scop); | |
c3284718 RS |
449 | dependences = isl_union_map_gist_domain (dependences, |
450 | isl_union_set_copy (domain)); | |
451 | dependences = isl_union_map_gist_range (dependences, | |
452 | isl_union_set_copy (domain)); | |
b60cc080 TG |
453 | validity = dependences; |
454 | ||
455 | proximity = isl_union_map_copy (validity); | |
456 | ||
c3284718 RS |
457 | isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND); |
458 | isl_options_set_schedule_maximize_band_depth (scop->ctx, 1); | |
459 | isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN); | |
460 | isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE); | |
b60cc080 | 461 | schedule = isl_union_set_compute_schedule (domain, validity, proximity); |
c3284718 | 462 | isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT); |
b60cc080 TG |
463 | |
464 | if (!schedule) | |
465 | return false; | |
466 | ||
467 | schedule_map = getScheduleMap (schedule); | |
468 | ||
469 | apply_schedule_map_to_scop (scop, schedule_map); | |
470 | ||
471 | isl_schedule_free (schedule); | |
472 | isl_union_map_free (schedule_map); | |
473 | ||
474 | return true; | |
475 | } | |
476 | ||
477 | #endif |