]>
Commit | Line | Data |
---|---|---|
89049f25 | 1 | /* A scheduling optimizer for Graphite |
2 | Copyright (C) 2012 Free Software Foundation, Inc. | |
3 | Contributed by Tobias Grosser <tobias@grosser.es>. | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
22 | ||
23 | #ifdef HAVE_cloog | |
24 | #include <isl/set.h> | |
25 | #include <isl/map.h> | |
26 | #include <isl/union_map.h> | |
27 | #include <isl/schedule.h> | |
28 | #include <isl/band.h> | |
29 | #include <isl/aff.h> | |
30 | #include <isl/options.h> | |
3a889482 | 31 | #endif |
89049f25 | 32 | |
33 | #include "system.h" | |
34 | #include "coretypes.h" | |
35 | #include "tree-flow.h" | |
b9ed1410 | 36 | #include "dumpfile.h" |
89049f25 | 37 | #include "cfgloop.h" |
38 | #include "tree-chrec.h" | |
39 | #include "tree-data-ref.h" | |
40 | #include "tree-scalar-evolution.h" | |
41 | #include "sese.h" | |
42 | ||
3a889482 | 43 | #ifdef HAVE_cloog |
89049f25 | 44 | #include "graphite-poly.h" |
45 | ||
46 | static isl_union_set * | |
47 | scop_get_domains (scop_p scop ATTRIBUTE_UNUSED) | |
48 | { | |
49 | int i; | |
50 | poly_bb_p pbb; | |
51 | isl_space *space = isl_set_get_space (scop->context); | |
52 | isl_union_set *res = isl_union_set_empty (space); | |
53 | ||
f1f41a6c | 54 | FOR_EACH_VEC_ELT (scop->bbs, i, pbb) |
89049f25 | 55 | res = isl_union_set_add_set (res, isl_set_copy (pbb->domain)); |
56 | ||
57 | return res; | |
58 | } | |
59 | ||
60 | static isl_union_map * | |
61 | scop_get_dependences (scop_p scop) | |
62 | { | |
63 | isl_union_map *dependences; | |
64 | ||
65 | if (!scop->must_raw) | |
66 | compute_deps (scop, SCOP_BBS (scop), | |
67 | &scop->must_raw, &scop->may_raw, | |
68 | &scop->must_raw_no_source, &scop->may_raw_no_source, | |
69 | &scop->must_war, &scop->may_war, | |
70 | &scop->must_war_no_source, &scop->may_war_no_source, | |
71 | &scop->must_waw, &scop->may_waw, | |
72 | &scop->must_waw_no_source, &scop->may_waw_no_source); | |
73 | ||
74 | dependences = isl_union_map_copy (scop->must_raw); | |
75 | dependences = isl_union_map_union (dependences, | |
76 | isl_union_map_copy (scop->must_war)); | |
77 | dependences = isl_union_map_union (dependences, | |
78 | isl_union_map_copy (scop->must_waw)); | |
79 | dependences = isl_union_map_union (dependences, | |
80 | isl_union_map_copy (scop->may_raw)); | |
81 | dependences = isl_union_map_union (dependences, | |
82 | isl_union_map_copy (scop->may_war)); | |
83 | dependences = isl_union_map_union (dependences, | |
84 | isl_union_map_copy (scop->may_waw)); | |
85 | ||
86 | return dependences; | |
87 | } | |
88 | ||
89 | /* getTileMap - Create a map that describes a n-dimensonal tiling. | |
90 | ||
91 | getTileMap creates a map from a n-dimensional scattering space into an | |
92 | 2*n-dimensional scattering space. The map describes a rectangular tiling. | |
93 | ||
94 | Example: | |
95 | scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32 | |
96 | ||
97 | tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: | |
98 | t0 % 32 = 0 and t0 <= s0 < t0 + 32 and | |
99 | t1 % 32 = 0 and t1 <= s1 < t1 + 32} | |
100 | ||
101 | Before tiling: | |
102 | ||
103 | for (i = 0; i < N; i++) | |
104 | for (j = 0; j < M; j++) | |
105 | S(i,j) | |
106 | ||
107 | After tiling: | |
108 | ||
109 | for (t_i = 0; t_i < N; i+=32) | |
110 | for (t_j = 0; t_j < M; j+=32) | |
111 | for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 | |
112 | for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0 | |
113 | S(i,j) | |
114 | */ | |
115 | ||
116 | static isl_basic_map * | |
117 | getTileMap(isl_ctx *ctx, int scheduleDimensions, int tileSize) | |
118 | { | |
119 | int x; | |
120 | /* We construct | |
121 | ||
122 | tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: | |
123 | s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and | |
124 | s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32} | |
125 | ||
126 | and project out the auxilary dimensions a0 and a1. */ | |
127 | isl_space *Space = isl_space_alloc(ctx, 0, scheduleDimensions, | |
128 | scheduleDimensions * 3); | |
129 | isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space)); | |
130 | ||
131 | isl_local_space *LocalSpace = isl_local_space_from_space(Space); | |
132 | ||
133 | for (x = 0; x < scheduleDimensions; x++) | |
134 | { | |
135 | int sX = x; | |
136 | int tX = x; | |
137 | int pX = scheduleDimensions + x; | |
138 | int aX = 2 * scheduleDimensions + x; | |
139 | ||
140 | isl_constraint *c; | |
141 | ||
142 | /* sX = aX * tileSize; */ | |
143 | c = isl_equality_alloc(isl_local_space_copy(LocalSpace)); | |
144 | isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1); | |
145 | isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize); | |
146 | tileMap = isl_basic_map_add_constraint(tileMap, c); | |
147 | ||
148 | /* pX = sX; */ | |
149 | c = isl_equality_alloc(isl_local_space_copy(LocalSpace)); | |
150 | isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1); | |
151 | isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1); | |
152 | tileMap = isl_basic_map_add_constraint(tileMap, c); | |
153 | ||
154 | /* tX <= pX */ | |
155 | c = isl_inequality_alloc(isl_local_space_copy(LocalSpace)); | |
156 | isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1); | |
157 | isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1); | |
158 | tileMap = isl_basic_map_add_constraint(tileMap, c); | |
159 | ||
160 | /* pX <= tX + (tileSize - 1) */ | |
161 | c = isl_inequality_alloc(isl_local_space_copy(LocalSpace)); | |
162 | isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1); | |
163 | isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1); | |
164 | isl_constraint_set_constant_si(c, tileSize - 1); | |
165 | tileMap = isl_basic_map_add_constraint(tileMap, c); | |
166 | } | |
167 | ||
168 | /* Project out auxilary dimensions. | |
169 | ||
170 | The auxilary dimensions are transformed into existentially quantified ones. | |
171 | This reduces the number of visible scattering dimensions and allows Cloog | |
172 | to produces better code. */ | |
173 | tileMap = isl_basic_map_project_out(tileMap, isl_dim_out, | |
174 | 2 * scheduleDimensions, | |
175 | scheduleDimensions); | |
176 | isl_local_space_free(LocalSpace); | |
177 | return tileMap; | |
178 | } | |
179 | ||
180 | /* getScheduleForBand - Get the schedule for this band. | |
181 | ||
182 | Polly applies transformations like tiling on top of the isl calculated value. | |
183 | This can influence the number of scheduling dimension. The number of | |
184 | schedule dimensions is returned in the parameter 'Dimension'. */ | |
185 | static bool DisableTiling = false; | |
186 | ||
187 | static isl_union_map * | |
188 | getScheduleForBand(isl_band *Band, int *Dimensions) | |
189 | { | |
190 | isl_union_map *PartialSchedule; | |
191 | isl_ctx *ctx; | |
192 | isl_space *Space; | |
193 | isl_basic_map *TileMap; | |
194 | isl_union_map *TileUMap; | |
195 | ||
196 | PartialSchedule = isl_band_get_partial_schedule(Band); | |
197 | *Dimensions = isl_band_n_member(Band); | |
198 | ||
199 | if (DisableTiling) | |
200 | return PartialSchedule; | |
201 | ||
202 | /* It does not make any sense to tile a band with just one dimension. */ | |
203 | if (*Dimensions == 1) | |
204 | return PartialSchedule; | |
205 | ||
206 | ctx = isl_union_map_get_ctx(PartialSchedule); | |
207 | Space = isl_union_map_get_space(PartialSchedule); | |
208 | ||
209 | TileMap = getTileMap(ctx, *Dimensions, 32); | |
210 | TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap)); | |
211 | TileUMap = isl_union_map_align_params(TileUMap, Space); | |
212 | *Dimensions = 2 * *Dimensions; | |
213 | ||
214 | return isl_union_map_apply_range(PartialSchedule, TileUMap); | |
215 | } | |
216 | ||
217 | /* Create a map that pre-vectorizes one scheduling dimension. | |
218 | ||
219 | getPrevectorMap creates a map that maps each input dimension to the same | |
220 | output dimension, except for the dimension DimToVectorize. DimToVectorize is | |
221 | strip mined by 'VectorWidth' and the newly created point loop of | |
222 | DimToVectorize is moved to the innermost level. | |
223 | ||
224 | Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4): | |
225 | ||
226 | | Before transformation | |
227 | | | |
228 | | A[i,j] -> [i,j] | |
229 | | | |
230 | | for (i = 0; i < 128; i++) | |
231 | | for (j = 0; j < 128; j++) | |
232 | | A(i,j); | |
233 | ||
234 | Prevector map: | |
235 | [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip | |
236 | ||
237 | | After transformation: | |
238 | | | |
239 | | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip | |
240 | | | |
241 | | for (it = 0; it < 128; it+=4) | |
242 | | for (j = 0; j < 128; j++) | |
243 | | for (ip = max(0,it); ip < min(128, it + 3); ip++) | |
244 | | A(ip,j); | |
245 | ||
246 | The goal of this transformation is to create a trivially vectorizable loop. | |
247 | This means a parallel loop at the innermost level that has a constant number | |
248 | of iterations corresponding to the target vector width. | |
249 | ||
250 | This transformation creates a loop at the innermost level. The loop has a | |
251 | constant number of iterations, if the number of loop iterations at | |
252 | DimToVectorize can be devided by VectorWidth. The default VectorWidth is | |
253 | currently constant and not yet target specific. This function does not reason | |
254 | about parallelism. */ | |
255 | static isl_map * | |
256 | getPrevectorMap(isl_ctx *ctx, int DimToVectorize, | |
257 | int ScheduleDimensions, | |
258 | int VectorWidth) | |
259 | { | |
260 | isl_space *Space; | |
261 | isl_local_space *LocalSpace, *LocalSpaceRange; | |
262 | isl_set *Modulo; | |
263 | isl_map *TilingMap; | |
264 | isl_constraint *c; | |
265 | isl_aff *Aff; | |
266 | int PointDimension; /* ip */ | |
267 | int TileDimension; /* it */ | |
268 | isl_int VectorWidthMP; | |
269 | int i; | |
270 | ||
271 | /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/ | |
272 | ||
273 | Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1); | |
274 | TilingMap = isl_map_universe(isl_space_copy(Space)); | |
275 | LocalSpace = isl_local_space_from_space(Space); | |
276 | PointDimension = ScheduleDimensions; | |
277 | TileDimension = DimToVectorize; | |
278 | ||
279 | /* Create an identity map for everything except DimToVectorize and map | |
280 | DimToVectorize to the point loop at the innermost dimension. */ | |
281 | for (i = 0; i < ScheduleDimensions; i++) | |
282 | { | |
283 | c = isl_equality_alloc(isl_local_space_copy(LocalSpace)); | |
284 | isl_constraint_set_coefficient_si(c, isl_dim_in, i, -1); | |
285 | ||
286 | if (i == DimToVectorize) | |
287 | isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1); | |
288 | else | |
289 | isl_constraint_set_coefficient_si(c, isl_dim_out, i, 1); | |
290 | ||
291 | TilingMap = isl_map_add_constraint(TilingMap, c); | |
292 | } | |
293 | ||
294 | /* it % 'VectorWidth' = 0 */ | |
295 | LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace)); | |
296 | Aff = isl_aff_zero_on_domain(LocalSpaceRange); | |
297 | Aff = isl_aff_set_constant_si(Aff, VectorWidth); | |
298 | Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1); | |
299 | isl_int_init(VectorWidthMP); | |
300 | isl_int_set_si(VectorWidthMP, VectorWidth); | |
301 | Aff = isl_aff_mod(Aff, VectorWidthMP); | |
302 | isl_int_clear(VectorWidthMP); | |
303 | Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff)); | |
304 | TilingMap = isl_map_intersect_range(TilingMap, Modulo); | |
305 | ||
306 | /* it <= ip */ | |
307 | c = isl_inequality_alloc(isl_local_space_copy(LocalSpace)); | |
308 | isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, -1); | |
309 | isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1); | |
310 | TilingMap = isl_map_add_constraint(TilingMap, c); | |
311 | ||
312 | /* ip <= it + ('VectorWidth' - 1) */ | |
313 | c = isl_inequality_alloc(LocalSpace); | |
314 | isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1); | |
315 | isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1); | |
316 | isl_constraint_set_constant_si(c, VectorWidth - 1); | |
317 | TilingMap = isl_map_add_constraint(TilingMap, c); | |
318 | ||
319 | isl_map_dump(TilingMap); | |
320 | ||
321 | return TilingMap; | |
322 | } | |
323 | ||
324 | static bool EnablePollyVector = false; | |
325 | ||
326 | /* getScheduleForBandList - Get the scheduling map for a list of bands. | |
327 | ||
328 | We walk recursively the forest of bands to combine the schedules of the | |
329 | individual bands to the overall schedule. In case tiling is requested, | |
330 | the individual bands are tiled. */ | |
331 | static isl_union_map * | |
332 | getScheduleForBandList(isl_band_list *BandList) | |
333 | { | |
334 | int NumBands, i; | |
335 | isl_union_map *Schedule; | |
336 | isl_ctx *ctx; | |
337 | ||
338 | ctx = isl_band_list_get_ctx(BandList); | |
339 | NumBands = isl_band_list_n_band(BandList); | |
340 | Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0)); | |
341 | ||
342 | for (i = 0; i < NumBands; i++) | |
343 | { | |
344 | isl_band *Band; | |
345 | isl_union_map *PartialSchedule; | |
346 | int ScheduleDimensions; | |
347 | isl_space *Space; | |
348 | ||
349 | Band = isl_band_list_get_band(BandList, i); | |
350 | PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions); | |
351 | Space = isl_union_map_get_space(PartialSchedule); | |
352 | ||
353 | if (isl_band_has_children(Band)) | |
354 | { | |
355 | isl_band_list *Children; | |
356 | isl_union_map *SuffixSchedule; | |
357 | ||
358 | Children = isl_band_get_children(Band); | |
359 | SuffixSchedule = getScheduleForBandList(Children); | |
360 | PartialSchedule = isl_union_map_flat_range_product(PartialSchedule, | |
361 | SuffixSchedule); | |
362 | isl_band_list_free(Children); | |
363 | } | |
364 | else if (EnablePollyVector) | |
365 | { | |
366 | for (i = ScheduleDimensions - 1 ; i >= 0 ; i--) | |
367 | { | |
368 | if (isl_band_member_is_zero_distance(Band, i)) | |
369 | { | |
370 | isl_map *TileMap; | |
371 | isl_union_map *TileUMap; | |
372 | ||
373 | TileMap = getPrevectorMap(ctx, i, ScheduleDimensions, 4); | |
374 | TileUMap = isl_union_map_from_map(TileMap); | |
375 | TileUMap = isl_union_map_align_params(TileUMap, | |
376 | isl_space_copy(Space)); | |
377 | PartialSchedule = isl_union_map_apply_range(PartialSchedule, | |
378 | TileUMap); | |
379 | break; | |
380 | } | |
381 | } | |
382 | } | |
383 | ||
384 | Schedule = isl_union_map_union(Schedule, PartialSchedule); | |
385 | ||
386 | isl_band_free(Band); | |
387 | isl_space_free(Space); | |
388 | } | |
389 | ||
390 | return Schedule; | |
391 | } | |
392 | ||
393 | static isl_union_map * | |
394 | getScheduleMap(isl_schedule *Schedule) | |
395 | { | |
396 | isl_band_list *BandList = isl_schedule_get_band_forest(Schedule); | |
397 | isl_union_map *ScheduleMap = getScheduleForBandList(BandList); | |
398 | isl_band_list_free(BandList); | |
399 | return ScheduleMap; | |
400 | } | |
401 | ||
402 | static int | |
403 | getSingleMap(__isl_take isl_map *map, void *user) | |
404 | { | |
405 | isl_map **singleMap = (isl_map **) user; | |
406 | *singleMap = map; | |
407 | ||
408 | return 0; | |
409 | } | |
410 | ||
411 | static void | |
412 | apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map) | |
413 | { | |
414 | int i; | |
415 | poly_bb_p pbb; | |
416 | ||
f1f41a6c | 417 | FOR_EACH_VEC_ELT (scop->bbs, i, pbb) |
89049f25 | 418 | { |
419 | isl_set *domain = isl_set_copy (pbb->domain); | |
420 | isl_union_map *stmtBand; | |
421 | isl_map *stmtSchedule; | |
422 | ||
423 | stmtBand = isl_union_map_intersect_domain(isl_union_map_copy(schedule_map), | |
424 | isl_union_set_from_set(domain)); | |
425 | isl_union_map_foreach_map(stmtBand, getSingleMap, &stmtSchedule); | |
426 | isl_map_free(pbb->transformed); | |
427 | pbb->transformed = stmtSchedule; | |
428 | isl_union_map_free(stmtBand); | |
429 | } | |
430 | } | |
431 | ||
432 | static const int CONSTANT_BOUND = 20; | |
433 | ||
434 | bool | |
435 | optimize_isl (scop_p scop) | |
436 | { | |
437 | ||
438 | isl_schedule *schedule; | |
439 | isl_union_set *domain; | |
440 | isl_union_map *validity, *proximity, *dependences; | |
441 | isl_union_map *schedule_map; | |
442 | ||
443 | domain = scop_get_domains (scop); | |
444 | dependences = scop_get_dependences (scop); | |
445 | dependences = isl_union_map_gist_domain(dependences, | |
446 | isl_union_set_copy(domain)); | |
447 | dependences = isl_union_map_gist_range(dependences, | |
448 | isl_union_set_copy(domain)); | |
449 | validity = dependences; | |
450 | ||
451 | proximity = isl_union_map_copy (validity); | |
452 | ||
453 | isl_options_set_schedule_max_constant_term(scop->ctx, CONSTANT_BOUND); | |
454 | isl_options_set_schedule_maximize_band_depth(scop->ctx, 1); | |
455 | isl_options_set_schedule_fuse(scop->ctx, ISL_SCHEDULE_FUSE_MIN); | |
456 | isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_CONTINUE); | |
457 | schedule = isl_union_set_compute_schedule (domain, validity, proximity); | |
458 | isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_ABORT); | |
459 | ||
460 | if (!schedule) | |
461 | return false; | |
462 | ||
463 | schedule_map = getScheduleMap (schedule); | |
464 | ||
465 | apply_schedule_map_to_scop (scop, schedule_map); | |
466 | ||
467 | isl_schedule_free (schedule); | |
468 | isl_union_map_free (schedule_map); | |
469 | ||
470 | return true; | |
471 | } | |
472 | ||
473 | #endif |