]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/graphite-optimize-isl.c
This patch rewrites the old VEC macro-based interface into a new one
[thirdparty/gcc.git] / gcc / graphite-optimize-isl.c
CommitLineData
89049f25 1/* A scheduling optimizer for Graphite
2 Copyright (C) 2012 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22
23#ifdef HAVE_cloog
24#include <isl/set.h>
25#include <isl/map.h>
26#include <isl/union_map.h>
27#include <isl/schedule.h>
28#include <isl/band.h>
29#include <isl/aff.h>
30#include <isl/options.h>
3a889482 31#endif
89049f25 32
33#include "system.h"
34#include "coretypes.h"
35#include "tree-flow.h"
b9ed1410 36#include "dumpfile.h"
89049f25 37#include "cfgloop.h"
38#include "tree-chrec.h"
39#include "tree-data-ref.h"
40#include "tree-scalar-evolution.h"
41#include "sese.h"
42
3a889482 43#ifdef HAVE_cloog
89049f25 44#include "graphite-poly.h"
45
46static isl_union_set *
47scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
48{
49 int i;
50 poly_bb_p pbb;
51 isl_space *space = isl_set_get_space (scop->context);
52 isl_union_set *res = isl_union_set_empty (space);
53
f1f41a6c 54 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
89049f25 55 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
56
57 return res;
58}
59
60static isl_union_map *
61scop_get_dependences (scop_p scop)
62{
63 isl_union_map *dependences;
64
65 if (!scop->must_raw)
66 compute_deps (scop, SCOP_BBS (scop),
67 &scop->must_raw, &scop->may_raw,
68 &scop->must_raw_no_source, &scop->may_raw_no_source,
69 &scop->must_war, &scop->may_war,
70 &scop->must_war_no_source, &scop->may_war_no_source,
71 &scop->must_waw, &scop->may_waw,
72 &scop->must_waw_no_source, &scop->may_waw_no_source);
73
74 dependences = isl_union_map_copy (scop->must_raw);
75 dependences = isl_union_map_union (dependences,
76 isl_union_map_copy (scop->must_war));
77 dependences = isl_union_map_union (dependences,
78 isl_union_map_copy (scop->must_waw));
79 dependences = isl_union_map_union (dependences,
80 isl_union_map_copy (scop->may_raw));
81 dependences = isl_union_map_union (dependences,
82 isl_union_map_copy (scop->may_war));
83 dependences = isl_union_map_union (dependences,
84 isl_union_map_copy (scop->may_waw));
85
86 return dependences;
87}
88
89/* getTileMap - Create a map that describes a n-dimensonal tiling.
90
91 getTileMap creates a map from a n-dimensional scattering space into an
92 2*n-dimensional scattering space. The map describes a rectangular tiling.
93
94 Example:
95 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
96
97 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
98 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
99 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
100
101 Before tiling:
102
103 for (i = 0; i < N; i++)
104 for (j = 0; j < M; j++)
105 S(i,j)
106
107 After tiling:
108
109 for (t_i = 0; t_i < N; i+=32)
110 for (t_j = 0; t_j < M; j+=32)
111 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
112 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
113 S(i,j)
114 */
115
116static isl_basic_map *
117getTileMap(isl_ctx *ctx, int scheduleDimensions, int tileSize)
118{
119 int x;
120 /* We construct
121
122 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
123 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
124 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
125
126 and project out the auxilary dimensions a0 and a1. */
127 isl_space *Space = isl_space_alloc(ctx, 0, scheduleDimensions,
128 scheduleDimensions * 3);
129 isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space));
130
131 isl_local_space *LocalSpace = isl_local_space_from_space(Space);
132
133 for (x = 0; x < scheduleDimensions; x++)
134 {
135 int sX = x;
136 int tX = x;
137 int pX = scheduleDimensions + x;
138 int aX = 2 * scheduleDimensions + x;
139
140 isl_constraint *c;
141
142 /* sX = aX * tileSize; */
143 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
144 isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1);
145 isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize);
146 tileMap = isl_basic_map_add_constraint(tileMap, c);
147
148 /* pX = sX; */
149 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
150 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
151 isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1);
152 tileMap = isl_basic_map_add_constraint(tileMap, c);
153
154 /* tX <= pX */
155 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
156 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
157 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1);
158 tileMap = isl_basic_map_add_constraint(tileMap, c);
159
160 /* pX <= tX + (tileSize - 1) */
161 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
162 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1);
163 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1);
164 isl_constraint_set_constant_si(c, tileSize - 1);
165 tileMap = isl_basic_map_add_constraint(tileMap, c);
166 }
167
168 /* Project out auxilary dimensions.
169
170 The auxilary dimensions are transformed into existentially quantified ones.
171 This reduces the number of visible scattering dimensions and allows Cloog
172 to produces better code. */
173 tileMap = isl_basic_map_project_out(tileMap, isl_dim_out,
174 2 * scheduleDimensions,
175 scheduleDimensions);
176 isl_local_space_free(LocalSpace);
177 return tileMap;
178}
179
180/* getScheduleForBand - Get the schedule for this band.
181
182 Polly applies transformations like tiling on top of the isl calculated value.
183 This can influence the number of scheduling dimension. The number of
184 schedule dimensions is returned in the parameter 'Dimension'. */
185static bool DisableTiling = false;
186
187static isl_union_map *
188getScheduleForBand(isl_band *Band, int *Dimensions)
189{
190 isl_union_map *PartialSchedule;
191 isl_ctx *ctx;
192 isl_space *Space;
193 isl_basic_map *TileMap;
194 isl_union_map *TileUMap;
195
196 PartialSchedule = isl_band_get_partial_schedule(Band);
197 *Dimensions = isl_band_n_member(Band);
198
199 if (DisableTiling)
200 return PartialSchedule;
201
202 /* It does not make any sense to tile a band with just one dimension. */
203 if (*Dimensions == 1)
204 return PartialSchedule;
205
206 ctx = isl_union_map_get_ctx(PartialSchedule);
207 Space = isl_union_map_get_space(PartialSchedule);
208
209 TileMap = getTileMap(ctx, *Dimensions, 32);
210 TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap));
211 TileUMap = isl_union_map_align_params(TileUMap, Space);
212 *Dimensions = 2 * *Dimensions;
213
214 return isl_union_map_apply_range(PartialSchedule, TileUMap);
215}
216
217/* Create a map that pre-vectorizes one scheduling dimension.
218
219 getPrevectorMap creates a map that maps each input dimension to the same
220 output dimension, except for the dimension DimToVectorize. DimToVectorize is
221 strip mined by 'VectorWidth' and the newly created point loop of
222 DimToVectorize is moved to the innermost level.
223
224 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
225
226 | Before transformation
227 |
228 | A[i,j] -> [i,j]
229 |
230 | for (i = 0; i < 128; i++)
231 | for (j = 0; j < 128; j++)
232 | A(i,j);
233
234 Prevector map:
235 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
236
237 | After transformation:
238 |
239 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
240 |
241 | for (it = 0; it < 128; it+=4)
242 | for (j = 0; j < 128; j++)
243 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
244 | A(ip,j);
245
246 The goal of this transformation is to create a trivially vectorizable loop.
247 This means a parallel loop at the innermost level that has a constant number
248 of iterations corresponding to the target vector width.
249
250 This transformation creates a loop at the innermost level. The loop has a
251 constant number of iterations, if the number of loop iterations at
252 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
253 currently constant and not yet target specific. This function does not reason
254 about parallelism. */
255static isl_map *
256getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
257 int ScheduleDimensions,
258 int VectorWidth)
259{
260 isl_space *Space;
261 isl_local_space *LocalSpace, *LocalSpaceRange;
262 isl_set *Modulo;
263 isl_map *TilingMap;
264 isl_constraint *c;
265 isl_aff *Aff;
266 int PointDimension; /* ip */
267 int TileDimension; /* it */
268 isl_int VectorWidthMP;
269 int i;
270
271 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
272
273 Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
274 TilingMap = isl_map_universe(isl_space_copy(Space));
275 LocalSpace = isl_local_space_from_space(Space);
276 PointDimension = ScheduleDimensions;
277 TileDimension = DimToVectorize;
278
279 /* Create an identity map for everything except DimToVectorize and map
280 DimToVectorize to the point loop at the innermost dimension. */
281 for (i = 0; i < ScheduleDimensions; i++)
282 {
283 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
284 isl_constraint_set_coefficient_si(c, isl_dim_in, i, -1);
285
286 if (i == DimToVectorize)
287 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
288 else
289 isl_constraint_set_coefficient_si(c, isl_dim_out, i, 1);
290
291 TilingMap = isl_map_add_constraint(TilingMap, c);
292 }
293
294 /* it % 'VectorWidth' = 0 */
295 LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace));
296 Aff = isl_aff_zero_on_domain(LocalSpaceRange);
297 Aff = isl_aff_set_constant_si(Aff, VectorWidth);
298 Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1);
299 isl_int_init(VectorWidthMP);
300 isl_int_set_si(VectorWidthMP, VectorWidth);
301 Aff = isl_aff_mod(Aff, VectorWidthMP);
302 isl_int_clear(VectorWidthMP);
303 Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff));
304 TilingMap = isl_map_intersect_range(TilingMap, Modulo);
305
306 /* it <= ip */
307 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
308 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, -1);
309 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
310 TilingMap = isl_map_add_constraint(TilingMap, c);
311
312 /* ip <= it + ('VectorWidth' - 1) */
313 c = isl_inequality_alloc(LocalSpace);
314 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1);
315 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1);
316 isl_constraint_set_constant_si(c, VectorWidth - 1);
317 TilingMap = isl_map_add_constraint(TilingMap, c);
318
319 isl_map_dump(TilingMap);
320
321 return TilingMap;
322}
323
324static bool EnablePollyVector = false;
325
326/* getScheduleForBandList - Get the scheduling map for a list of bands.
327
328 We walk recursively the forest of bands to combine the schedules of the
329 individual bands to the overall schedule. In case tiling is requested,
330 the individual bands are tiled. */
331static isl_union_map *
332getScheduleForBandList(isl_band_list *BandList)
333{
334 int NumBands, i;
335 isl_union_map *Schedule;
336 isl_ctx *ctx;
337
338 ctx = isl_band_list_get_ctx(BandList);
339 NumBands = isl_band_list_n_band(BandList);
340 Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0));
341
342 for (i = 0; i < NumBands; i++)
343 {
344 isl_band *Band;
345 isl_union_map *PartialSchedule;
346 int ScheduleDimensions;
347 isl_space *Space;
348
349 Band = isl_band_list_get_band(BandList, i);
350 PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions);
351 Space = isl_union_map_get_space(PartialSchedule);
352
353 if (isl_band_has_children(Band))
354 {
355 isl_band_list *Children;
356 isl_union_map *SuffixSchedule;
357
358 Children = isl_band_get_children(Band);
359 SuffixSchedule = getScheduleForBandList(Children);
360 PartialSchedule = isl_union_map_flat_range_product(PartialSchedule,
361 SuffixSchedule);
362 isl_band_list_free(Children);
363 }
364 else if (EnablePollyVector)
365 {
366 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
367 {
368 if (isl_band_member_is_zero_distance(Band, i))
369 {
370 isl_map *TileMap;
371 isl_union_map *TileUMap;
372
373 TileMap = getPrevectorMap(ctx, i, ScheduleDimensions, 4);
374 TileUMap = isl_union_map_from_map(TileMap);
375 TileUMap = isl_union_map_align_params(TileUMap,
376 isl_space_copy(Space));
377 PartialSchedule = isl_union_map_apply_range(PartialSchedule,
378 TileUMap);
379 break;
380 }
381 }
382 }
383
384 Schedule = isl_union_map_union(Schedule, PartialSchedule);
385
386 isl_band_free(Band);
387 isl_space_free(Space);
388 }
389
390 return Schedule;
391}
392
393static isl_union_map *
394getScheduleMap(isl_schedule *Schedule)
395{
396 isl_band_list *BandList = isl_schedule_get_band_forest(Schedule);
397 isl_union_map *ScheduleMap = getScheduleForBandList(BandList);
398 isl_band_list_free(BandList);
399 return ScheduleMap;
400}
401
402static int
403getSingleMap(__isl_take isl_map *map, void *user)
404{
405 isl_map **singleMap = (isl_map **) user;
406 *singleMap = map;
407
408 return 0;
409}
410
411static void
412apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
413{
414 int i;
415 poly_bb_p pbb;
416
f1f41a6c 417 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
89049f25 418 {
419 isl_set *domain = isl_set_copy (pbb->domain);
420 isl_union_map *stmtBand;
421 isl_map *stmtSchedule;
422
423 stmtBand = isl_union_map_intersect_domain(isl_union_map_copy(schedule_map),
424 isl_union_set_from_set(domain));
425 isl_union_map_foreach_map(stmtBand, getSingleMap, &stmtSchedule);
426 isl_map_free(pbb->transformed);
427 pbb->transformed = stmtSchedule;
428 isl_union_map_free(stmtBand);
429 }
430}
431
432static const int CONSTANT_BOUND = 20;
433
434bool
435optimize_isl (scop_p scop)
436{
437
438 isl_schedule *schedule;
439 isl_union_set *domain;
440 isl_union_map *validity, *proximity, *dependences;
441 isl_union_map *schedule_map;
442
443 domain = scop_get_domains (scop);
444 dependences = scop_get_dependences (scop);
445 dependences = isl_union_map_gist_domain(dependences,
446 isl_union_set_copy(domain));
447 dependences = isl_union_map_gist_range(dependences,
448 isl_union_set_copy(domain));
449 validity = dependences;
450
451 proximity = isl_union_map_copy (validity);
452
453 isl_options_set_schedule_max_constant_term(scop->ctx, CONSTANT_BOUND);
454 isl_options_set_schedule_maximize_band_depth(scop->ctx, 1);
455 isl_options_set_schedule_fuse(scop->ctx, ISL_SCHEDULE_FUSE_MIN);
456 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_CONTINUE);
457 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
458 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_ABORT);
459
460 if (!schedule)
461 return false;
462
463 schedule_map = getScheduleMap (schedule);
464
465 apply_schedule_map_to_scop (scop, schedule_map);
466
467 isl_schedule_free (schedule);
468 isl_union_map_free (schedule_map);
469
470 return true;
471}
472
473#endif