]>
Commit | Line | Data |
---|---|---|
b60cc080 | 1 | /* A scheduling optimizer for Graphite |
23a5b65a | 2 | Copyright (C) 2012-2014 Free Software Foundation, Inc. |
b60cc080 TG |
3 | Contributed by Tobias Grosser <tobias@grosser.es>. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
22 | ||
eae1a5d4 | 23 | #ifdef HAVE_isl |
b60cc080 TG |
24 | #include <isl/set.h> |
25 | #include <isl/map.h> | |
26 | #include <isl/union_map.h> | |
27 | #include <isl/schedule.h> | |
28 | #include <isl/band.h> | |
29 | #include <isl/aff.h> | |
30 | #include <isl/options.h> | |
02663f24 | 31 | #endif |
b60cc080 TG |
32 | |
33 | #include "system.h" | |
34 | #include "coretypes.h" | |
4d648807 | 35 | #include "tree.h" |
60393bbc AM |
36 | #include "predict.h" |
37 | #include "vec.h" | |
38 | #include "hashtab.h" | |
39 | #include "hash-set.h" | |
40 | #include "machmode.h" | |
41 | #include "tm.h" | |
42 | #include "hard-reg-set.h" | |
43 | #include "input.h" | |
44 | #include "function.h" | |
45 | #include "dominance.h" | |
46 | #include "cfg.h" | |
2fb9a547 AM |
47 | #include "basic-block.h" |
48 | #include "tree-ssa-alias.h" | |
49 | #include "internal-fn.h" | |
50 | #include "gimple-expr.h" | |
51 | #include "is-a.h" | |
442b4905 | 52 | #include "gimple.h" |
5be5c238 | 53 | #include "gimple-iterator.h" |
442b4905 | 54 | #include "tree-ssa-loop.h" |
7ee2468b | 55 | #include "dumpfile.h" |
b60cc080 TG |
56 | #include "cfgloop.h" |
57 | #include "tree-chrec.h" | |
58 | #include "tree-data-ref.h" | |
59 | #include "tree-scalar-evolution.h" | |
60 | #include "sese.h" | |
61 | ||
eae1a5d4 | 62 | #ifdef HAVE_isl |
b60cc080 TG |
63 | #include "graphite-poly.h" |
64 | ||
65 | static isl_union_set * | |
66 | scop_get_domains (scop_p scop ATTRIBUTE_UNUSED) | |
67 | { | |
68 | int i; | |
69 | poly_bb_p pbb; | |
70 | isl_space *space = isl_set_get_space (scop->context); | |
71 | isl_union_set *res = isl_union_set_empty (space); | |
72 | ||
9771b263 | 73 | FOR_EACH_VEC_ELT (scop->bbs, i, pbb) |
b60cc080 TG |
74 | res = isl_union_set_add_set (res, isl_set_copy (pbb->domain)); |
75 | ||
76 | return res; | |
77 | } | |
78 | ||
b60cc080 TG |
79 | /* getTileMap - Create a map that describes a n-dimensonal tiling. |
80 | ||
81 | getTileMap creates a map from a n-dimensional scattering space into an | |
82 | 2*n-dimensional scattering space. The map describes a rectangular tiling. | |
83 | ||
84 | Example: | |
85 | scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32 | |
86 | ||
87 | tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: | |
88 | t0 % 32 = 0 and t0 <= s0 < t0 + 32 and | |
89 | t1 % 32 = 0 and t1 <= s1 < t1 + 32} | |
90 | ||
91 | Before tiling: | |
92 | ||
93 | for (i = 0; i < N; i++) | |
94 | for (j = 0; j < M; j++) | |
95 | S(i,j) | |
96 | ||
97 | After tiling: | |
98 | ||
99 | for (t_i = 0; t_i < N; i+=32) | |
100 | for (t_j = 0; t_j < M; j+=32) | |
101 | for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 | |
102 | for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0 | |
103 | S(i,j) | |
104 | */ | |
105 | ||
106 | static isl_basic_map * | |
c3284718 | 107 | getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize) |
b60cc080 TG |
108 | { |
109 | int x; | |
110 | /* We construct | |
111 | ||
112 | tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: | |
113 | s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and | |
114 | s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32} | |
115 | ||
116 | and project out the auxilary dimensions a0 and a1. */ | |
c3284718 RS |
117 | isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions, |
118 | scheduleDimensions * 3); | |
119 | isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space)); | |
b60cc080 | 120 | |
c3284718 | 121 | isl_local_space *LocalSpace = isl_local_space_from_space (Space); |
b60cc080 TG |
122 | |
123 | for (x = 0; x < scheduleDimensions; x++) | |
124 | { | |
125 | int sX = x; | |
126 | int tX = x; | |
127 | int pX = scheduleDimensions + x; | |
128 | int aX = 2 * scheduleDimensions + x; | |
129 | ||
130 | isl_constraint *c; | |
131 | ||
132 | /* sX = aX * tileSize; */ | |
c3284718 RS |
133 | c = isl_equality_alloc (isl_local_space_copy (LocalSpace)); |
134 | isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1); | |
135 | isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize); | |
136 | tileMap = isl_basic_map_add_constraint (tileMap, c); | |
b60cc080 TG |
137 | |
138 | /* pX = sX; */ | |
c3284718 RS |
139 | c = isl_equality_alloc (isl_local_space_copy (LocalSpace)); |
140 | isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1); | |
141 | isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1); | |
142 | tileMap = isl_basic_map_add_constraint (tileMap, c); | |
b60cc080 TG |
143 | |
144 | /* tX <= pX */ | |
c3284718 RS |
145 | c = isl_inequality_alloc (isl_local_space_copy (LocalSpace)); |
146 | isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1); | |
147 | isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1); | |
148 | tileMap = isl_basic_map_add_constraint (tileMap, c); | |
b60cc080 TG |
149 | |
150 | /* pX <= tX + (tileSize - 1) */ | |
c3284718 RS |
151 | c = isl_inequality_alloc (isl_local_space_copy (LocalSpace)); |
152 | isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1); | |
153 | isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1); | |
154 | isl_constraint_set_constant_si (c, tileSize - 1); | |
155 | tileMap = isl_basic_map_add_constraint (tileMap, c); | |
b60cc080 TG |
156 | } |
157 | ||
688010ba | 158 | /* Project out auxiliary dimensions. |
b60cc080 | 159 | |
688010ba | 160 | The auxiliary dimensions are transformed into existentially quantified ones. |
b60cc080 TG |
161 | This reduces the number of visible scattering dimensions and allows Cloog |
162 | to produces better code. */ | |
c3284718 RS |
163 | tileMap = isl_basic_map_project_out (tileMap, isl_dim_out, |
164 | 2 * scheduleDimensions, | |
165 | scheduleDimensions); | |
166 | isl_local_space_free (LocalSpace); | |
b60cc080 TG |
167 | return tileMap; |
168 | } | |
169 | ||
170 | /* getScheduleForBand - Get the schedule for this band. | |
171 | ||
172 | Polly applies transformations like tiling on top of the isl calculated value. | |
173 | This can influence the number of scheduling dimension. The number of | |
174 | schedule dimensions is returned in the parameter 'Dimension'. */ | |
175 | static bool DisableTiling = false; | |
176 | ||
177 | static isl_union_map * | |
c3284718 | 178 | getScheduleForBand (isl_band *Band, int *Dimensions) |
b60cc080 TG |
179 | { |
180 | isl_union_map *PartialSchedule; | |
181 | isl_ctx *ctx; | |
182 | isl_space *Space; | |
183 | isl_basic_map *TileMap; | |
184 | isl_union_map *TileUMap; | |
185 | ||
c3284718 RS |
186 | PartialSchedule = isl_band_get_partial_schedule (Band); |
187 | *Dimensions = isl_band_n_member (Band); | |
b60cc080 TG |
188 | |
189 | if (DisableTiling) | |
190 | return PartialSchedule; | |
191 | ||
192 | /* It does not make any sense to tile a band with just one dimension. */ | |
193 | if (*Dimensions == 1) | |
194 | return PartialSchedule; | |
195 | ||
c3284718 RS |
196 | ctx = isl_union_map_get_ctx (PartialSchedule); |
197 | Space = isl_union_map_get_space (PartialSchedule); | |
b60cc080 | 198 | |
c3284718 RS |
199 | TileMap = getTileMap (ctx, *Dimensions, 32); |
200 | TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap)); | |
201 | TileUMap = isl_union_map_align_params (TileUMap, Space); | |
b60cc080 TG |
202 | *Dimensions = 2 * *Dimensions; |
203 | ||
c3284718 | 204 | return isl_union_map_apply_range (PartialSchedule, TileUMap); |
b60cc080 TG |
205 | } |
206 | ||
207 | /* Create a map that pre-vectorizes one scheduling dimension. | |
208 | ||
209 | getPrevectorMap creates a map that maps each input dimension to the same | |
210 | output dimension, except for the dimension DimToVectorize. DimToVectorize is | |
211 | strip mined by 'VectorWidth' and the newly created point loop of | |
212 | DimToVectorize is moved to the innermost level. | |
213 | ||
214 | Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4): | |
215 | ||
216 | | Before transformation | |
217 | | | |
218 | | A[i,j] -> [i,j] | |
219 | | | |
220 | | for (i = 0; i < 128; i++) | |
221 | | for (j = 0; j < 128; j++) | |
222 | | A(i,j); | |
223 | ||
224 | Prevector map: | |
225 | [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip | |
226 | ||
227 | | After transformation: | |
228 | | | |
229 | | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip | |
230 | | | |
231 | | for (it = 0; it < 128; it+=4) | |
232 | | for (j = 0; j < 128; j++) | |
233 | | for (ip = max(0,it); ip < min(128, it + 3); ip++) | |
234 | | A(ip,j); | |
235 | ||
236 | The goal of this transformation is to create a trivially vectorizable loop. | |
237 | This means a parallel loop at the innermost level that has a constant number | |
238 | of iterations corresponding to the target vector width. | |
239 | ||
240 | This transformation creates a loop at the innermost level. The loop has a | |
241 | constant number of iterations, if the number of loop iterations at | |
242 | DimToVectorize can be devided by VectorWidth. The default VectorWidth is | |
243 | currently constant and not yet target specific. This function does not reason | |
244 | about parallelism. */ | |
245 | static isl_map * | |
c3284718 RS |
246 | getPrevectorMap (isl_ctx *ctx, int DimToVectorize, |
247 | int ScheduleDimensions, | |
248 | int VectorWidth) | |
b60cc080 TG |
249 | { |
250 | isl_space *Space; | |
251 | isl_local_space *LocalSpace, *LocalSpaceRange; | |
252 | isl_set *Modulo; | |
253 | isl_map *TilingMap; | |
254 | isl_constraint *c; | |
255 | isl_aff *Aff; | |
256 | int PointDimension; /* ip */ | |
257 | int TileDimension; /* it */ | |
b47595f7 | 258 | isl_val *VectorWidthMP; |
b60cc080 TG |
259 | int i; |
260 | ||
261 | /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/ | |
262 | ||
c3284718 RS |
263 | Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1); |
264 | TilingMap = isl_map_universe (isl_space_copy (Space)); | |
265 | LocalSpace = isl_local_space_from_space (Space); | |
b60cc080 TG |
266 | PointDimension = ScheduleDimensions; |
267 | TileDimension = DimToVectorize; | |
268 | ||
269 | /* Create an identity map for everything except DimToVectorize and map | |
270 | DimToVectorize to the point loop at the innermost dimension. */ | |
271 | for (i = 0; i < ScheduleDimensions; i++) | |
272 | { | |
c3284718 RS |
273 | c = isl_equality_alloc (isl_local_space_copy (LocalSpace)); |
274 | isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1); | |
b60cc080 TG |
275 | |
276 | if (i == DimToVectorize) | |
c3284718 | 277 | isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1); |
b60cc080 | 278 | else |
c3284718 | 279 | isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1); |
b60cc080 | 280 | |
c3284718 | 281 | TilingMap = isl_map_add_constraint (TilingMap, c); |
b60cc080 TG |
282 | } |
283 | ||
284 | /* it % 'VectorWidth' = 0 */ | |
c3284718 RS |
285 | LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace)); |
286 | Aff = isl_aff_zero_on_domain (LocalSpaceRange); | |
287 | Aff = isl_aff_set_constant_si (Aff, VectorWidth); | |
288 | Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1); | |
b47595f7 MN |
289 | |
290 | VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth); | |
291 | Aff = isl_aff_mod_val (Aff, VectorWidthMP); | |
c3284718 RS |
292 | Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff)); |
293 | TilingMap = isl_map_intersect_range (TilingMap, Modulo); | |
b60cc080 TG |
294 | |
295 | /* it <= ip */ | |
c3284718 RS |
296 | c = isl_inequality_alloc (isl_local_space_copy (LocalSpace)); |
297 | isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1); | |
298 | isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1); | |
299 | TilingMap = isl_map_add_constraint (TilingMap, c); | |
b60cc080 TG |
300 | |
301 | /* ip <= it + ('VectorWidth' - 1) */ | |
c3284718 RS |
302 | c = isl_inequality_alloc (LocalSpace); |
303 | isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1); | |
304 | isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1); | |
305 | isl_constraint_set_constant_si (c, VectorWidth - 1); | |
306 | TilingMap = isl_map_add_constraint (TilingMap, c); | |
b60cc080 | 307 | |
c3284718 | 308 | isl_map_dump (TilingMap); |
b60cc080 TG |
309 | |
310 | return TilingMap; | |
311 | } | |
312 | ||
313 | static bool EnablePollyVector = false; | |
314 | ||
315 | /* getScheduleForBandList - Get the scheduling map for a list of bands. | |
316 | ||
317 | We walk recursively the forest of bands to combine the schedules of the | |
318 | individual bands to the overall schedule. In case tiling is requested, | |
319 | the individual bands are tiled. */ | |
320 | static isl_union_map * | |
c3284718 | 321 | getScheduleForBandList (isl_band_list *BandList) |
b60cc080 TG |
322 | { |
323 | int NumBands, i; | |
324 | isl_union_map *Schedule; | |
325 | isl_ctx *ctx; | |
326 | ||
c3284718 RS |
327 | ctx = isl_band_list_get_ctx (BandList); |
328 | NumBands = isl_band_list_n_band (BandList); | |
329 | Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0)); | |
b60cc080 TG |
330 | |
331 | for (i = 0; i < NumBands; i++) | |
332 | { | |
333 | isl_band *Band; | |
334 | isl_union_map *PartialSchedule; | |
335 | int ScheduleDimensions; | |
336 | isl_space *Space; | |
337 | ||
c3284718 RS |
338 | Band = isl_band_list_get_band (BandList, i); |
339 | PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions); | |
340 | Space = isl_union_map_get_space (PartialSchedule); | |
b60cc080 | 341 | |
c3284718 | 342 | if (isl_band_has_children (Band)) |
b60cc080 TG |
343 | { |
344 | isl_band_list *Children; | |
345 | isl_union_map *SuffixSchedule; | |
346 | ||
c3284718 RS |
347 | Children = isl_band_get_children (Band); |
348 | SuffixSchedule = getScheduleForBandList (Children); | |
349 | PartialSchedule = isl_union_map_flat_range_product (PartialSchedule, | |
350 | SuffixSchedule); | |
351 | isl_band_list_free (Children); | |
b60cc080 TG |
352 | } |
353 | else if (EnablePollyVector) | |
354 | { | |
355 | for (i = ScheduleDimensions - 1 ; i >= 0 ; i--) | |
356 | { | |
c3284718 | 357 | if (isl_band_member_is_zero_distance (Band, i)) |
b60cc080 TG |
358 | { |
359 | isl_map *TileMap; | |
360 | isl_union_map *TileUMap; | |
361 | ||
c3284718 RS |
362 | TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4); |
363 | TileUMap = isl_union_map_from_map (TileMap); | |
364 | TileUMap = isl_union_map_align_params | |
365 | (TileUMap, isl_space_copy (Space)); | |
366 | PartialSchedule = isl_union_map_apply_range | |
367 | (PartialSchedule, TileUMap); | |
b60cc080 TG |
368 | break; |
369 | } | |
370 | } | |
371 | } | |
372 | ||
c3284718 | 373 | Schedule = isl_union_map_union (Schedule, PartialSchedule); |
b60cc080 | 374 | |
c3284718 RS |
375 | isl_band_free (Band); |
376 | isl_space_free (Space); | |
b60cc080 TG |
377 | } |
378 | ||
379 | return Schedule; | |
380 | } | |
381 | ||
382 | static isl_union_map * | |
c3284718 | 383 | getScheduleMap (isl_schedule *Schedule) |
b60cc080 | 384 | { |
c3284718 RS |
385 | isl_band_list *BandList = isl_schedule_get_band_forest (Schedule); |
386 | isl_union_map *ScheduleMap = getScheduleForBandList (BandList); | |
387 | isl_band_list_free (BandList); | |
b60cc080 TG |
388 | return ScheduleMap; |
389 | } | |
390 | ||
391 | static int | |
c3284718 | 392 | getSingleMap (__isl_take isl_map *map, void *user) |
b60cc080 TG |
393 | { |
394 | isl_map **singleMap = (isl_map **) user; | |
395 | *singleMap = map; | |
396 | ||
397 | return 0; | |
398 | } | |
399 | ||
400 | static void | |
401 | apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map) | |
402 | { | |
403 | int i; | |
404 | poly_bb_p pbb; | |
405 | ||
9771b263 | 406 | FOR_EACH_VEC_ELT (scop->bbs, i, pbb) |
b60cc080 TG |
407 | { |
408 | isl_set *domain = isl_set_copy (pbb->domain); | |
409 | isl_union_map *stmtBand; | |
410 | isl_map *stmtSchedule; | |
411 | ||
c3284718 RS |
412 | stmtBand = isl_union_map_intersect_domain |
413 | (isl_union_map_copy (schedule_map), | |
414 | isl_union_set_from_set (domain)); | |
415 | isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule); | |
416 | isl_map_free (pbb->transformed); | |
b60cc080 | 417 | pbb->transformed = stmtSchedule; |
c3284718 | 418 | isl_union_map_free (stmtBand); |
b60cc080 TG |
419 | } |
420 | } | |
421 | ||
422 | static const int CONSTANT_BOUND = 20; | |
423 | ||
424 | bool | |
425 | optimize_isl (scop_p scop) | |
426 | { | |
427 | ||
428 | isl_schedule *schedule; | |
429 | isl_union_set *domain; | |
430 | isl_union_map *validity, *proximity, *dependences; | |
431 | isl_union_map *schedule_map; | |
432 | ||
433 | domain = scop_get_domains (scop); | |
434 | dependences = scop_get_dependences (scop); | |
c3284718 RS |
435 | dependences = isl_union_map_gist_domain (dependences, |
436 | isl_union_set_copy (domain)); | |
437 | dependences = isl_union_map_gist_range (dependences, | |
438 | isl_union_set_copy (domain)); | |
b60cc080 TG |
439 | validity = dependences; |
440 | ||
441 | proximity = isl_union_map_copy (validity); | |
442 | ||
c3284718 RS |
443 | isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND); |
444 | isl_options_set_schedule_maximize_band_depth (scop->ctx, 1); | |
445 | isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN); | |
446 | isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE); | |
b60cc080 | 447 | schedule = isl_union_set_compute_schedule (domain, validity, proximity); |
c3284718 | 448 | isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT); |
b60cc080 TG |
449 | |
450 | if (!schedule) | |
451 | return false; | |
452 | ||
453 | schedule_map = getScheduleMap (schedule); | |
454 | ||
455 | apply_schedule_map_to_scop (scop, schedule_map); | |
456 | ||
457 | isl_schedule_free (schedule); | |
458 | isl_union_map_free (schedule_map); | |
459 | ||
460 | return true; | |
461 | } | |
462 | ||
463 | #endif |