]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/graphite-optimize-isl.c
[PATCH 3/7] OpenMP 4.0 offloading infrastructure: Offload tables.
[thirdparty/gcc.git] / gcc / graphite-optimize-isl.c
CommitLineData
b60cc080 1/* A scheduling optimizer for Graphite
23a5b65a 2 Copyright (C) 2012-2014 Free Software Foundation, Inc.
b60cc080
TG
3 Contributed by Tobias Grosser <tobias@grosser.es>.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22
eae1a5d4 23#ifdef HAVE_isl
b60cc080
TG
24#include <isl/set.h>
25#include <isl/map.h>
26#include <isl/union_map.h>
27#include <isl/schedule.h>
28#include <isl/band.h>
29#include <isl/aff.h>
30#include <isl/options.h>
02663f24 31#endif
b60cc080
TG
32
33#include "system.h"
34#include "coretypes.h"
4d648807 35#include "tree.h"
60393bbc
AM
36#include "predict.h"
37#include "vec.h"
38#include "hashtab.h"
39#include "hash-set.h"
40#include "machmode.h"
41#include "tm.h"
42#include "hard-reg-set.h"
43#include "input.h"
44#include "function.h"
45#include "dominance.h"
46#include "cfg.h"
2fb9a547
AM
47#include "basic-block.h"
48#include "tree-ssa-alias.h"
49#include "internal-fn.h"
50#include "gimple-expr.h"
51#include "is-a.h"
442b4905 52#include "gimple.h"
5be5c238 53#include "gimple-iterator.h"
442b4905 54#include "tree-ssa-loop.h"
7ee2468b 55#include "dumpfile.h"
b60cc080
TG
56#include "cfgloop.h"
57#include "tree-chrec.h"
58#include "tree-data-ref.h"
59#include "tree-scalar-evolution.h"
60#include "sese.h"
61
eae1a5d4 62#ifdef HAVE_isl
b60cc080
TG
63#include "graphite-poly.h"
64
65static isl_union_set *
66scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
67{
68 int i;
69 poly_bb_p pbb;
70 isl_space *space = isl_set_get_space (scop->context);
71 isl_union_set *res = isl_union_set_empty (space);
72
9771b263 73 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
b60cc080
TG
74 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
75
76 return res;
77}
78
b60cc080
TG
79/* getTileMap - Create a map that describes a n-dimensonal tiling.
80
81 getTileMap creates a map from a n-dimensional scattering space into an
82 2*n-dimensional scattering space. The map describes a rectangular tiling.
83
84 Example:
85 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
86
87 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
88 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
89 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
90
91 Before tiling:
92
93 for (i = 0; i < N; i++)
94 for (j = 0; j < M; j++)
95 S(i,j)
96
97 After tiling:
98
99 for (t_i = 0; t_i < N; i+=32)
100 for (t_j = 0; t_j < M; j+=32)
101 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
102 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
103 S(i,j)
104 */
105
106static isl_basic_map *
c3284718 107getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
b60cc080
TG
108{
109 int x;
110 /* We construct
111
112 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
113 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
114 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
115
116 and project out the auxilary dimensions a0 and a1. */
c3284718
RS
117 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
118 scheduleDimensions * 3);
119 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
b60cc080 120
c3284718 121 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
b60cc080
TG
122
123 for (x = 0; x < scheduleDimensions; x++)
124 {
125 int sX = x;
126 int tX = x;
127 int pX = scheduleDimensions + x;
128 int aX = 2 * scheduleDimensions + x;
129
130 isl_constraint *c;
131
132 /* sX = aX * tileSize; */
c3284718
RS
133 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
134 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
135 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
136 tileMap = isl_basic_map_add_constraint (tileMap, c);
b60cc080
TG
137
138 /* pX = sX; */
c3284718
RS
139 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
140 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
141 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
142 tileMap = isl_basic_map_add_constraint (tileMap, c);
b60cc080
TG
143
144 /* tX <= pX */
c3284718
RS
145 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
146 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
147 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
148 tileMap = isl_basic_map_add_constraint (tileMap, c);
b60cc080
TG
149
150 /* pX <= tX + (tileSize - 1) */
c3284718
RS
151 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
152 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
153 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
154 isl_constraint_set_constant_si (c, tileSize - 1);
155 tileMap = isl_basic_map_add_constraint (tileMap, c);
b60cc080
TG
156 }
157
688010ba 158 /* Project out auxiliary dimensions.
b60cc080 159
688010ba 160 The auxiliary dimensions are transformed into existentially quantified ones.
b60cc080
TG
161 This reduces the number of visible scattering dimensions and allows Cloog
162 to produces better code. */
c3284718
RS
163 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
164 2 * scheduleDimensions,
165 scheduleDimensions);
166 isl_local_space_free (LocalSpace);
b60cc080
TG
167 return tileMap;
168}
169
170/* getScheduleForBand - Get the schedule for this band.
171
172 Polly applies transformations like tiling on top of the isl calculated value.
173 This can influence the number of scheduling dimension. The number of
174 schedule dimensions is returned in the parameter 'Dimension'. */
175static bool DisableTiling = false;
176
177static isl_union_map *
c3284718 178getScheduleForBand (isl_band *Band, int *Dimensions)
b60cc080
TG
179{
180 isl_union_map *PartialSchedule;
181 isl_ctx *ctx;
182 isl_space *Space;
183 isl_basic_map *TileMap;
184 isl_union_map *TileUMap;
185
c3284718
RS
186 PartialSchedule = isl_band_get_partial_schedule (Band);
187 *Dimensions = isl_band_n_member (Band);
b60cc080
TG
188
189 if (DisableTiling)
190 return PartialSchedule;
191
192 /* It does not make any sense to tile a band with just one dimension. */
193 if (*Dimensions == 1)
194 return PartialSchedule;
195
c3284718
RS
196 ctx = isl_union_map_get_ctx (PartialSchedule);
197 Space = isl_union_map_get_space (PartialSchedule);
b60cc080 198
c3284718
RS
199 TileMap = getTileMap (ctx, *Dimensions, 32);
200 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
201 TileUMap = isl_union_map_align_params (TileUMap, Space);
b60cc080
TG
202 *Dimensions = 2 * *Dimensions;
203
c3284718 204 return isl_union_map_apply_range (PartialSchedule, TileUMap);
b60cc080
TG
205}
206
207/* Create a map that pre-vectorizes one scheduling dimension.
208
209 getPrevectorMap creates a map that maps each input dimension to the same
210 output dimension, except for the dimension DimToVectorize. DimToVectorize is
211 strip mined by 'VectorWidth' and the newly created point loop of
212 DimToVectorize is moved to the innermost level.
213
214 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
215
216 | Before transformation
217 |
218 | A[i,j] -> [i,j]
219 |
220 | for (i = 0; i < 128; i++)
221 | for (j = 0; j < 128; j++)
222 | A(i,j);
223
224 Prevector map:
225 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
226
227 | After transformation:
228 |
229 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
230 |
231 | for (it = 0; it < 128; it+=4)
232 | for (j = 0; j < 128; j++)
233 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
234 | A(ip,j);
235
236 The goal of this transformation is to create a trivially vectorizable loop.
237 This means a parallel loop at the innermost level that has a constant number
238 of iterations corresponding to the target vector width.
239
240 This transformation creates a loop at the innermost level. The loop has a
241 constant number of iterations, if the number of loop iterations at
242 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
243 currently constant and not yet target specific. This function does not reason
244 about parallelism. */
245static isl_map *
c3284718
RS
246getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
247 int ScheduleDimensions,
248 int VectorWidth)
b60cc080
TG
249{
250 isl_space *Space;
251 isl_local_space *LocalSpace, *LocalSpaceRange;
252 isl_set *Modulo;
253 isl_map *TilingMap;
254 isl_constraint *c;
255 isl_aff *Aff;
256 int PointDimension; /* ip */
257 int TileDimension; /* it */
b47595f7 258 isl_val *VectorWidthMP;
b60cc080
TG
259 int i;
260
261 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
262
c3284718
RS
263 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
264 TilingMap = isl_map_universe (isl_space_copy (Space));
265 LocalSpace = isl_local_space_from_space (Space);
b60cc080
TG
266 PointDimension = ScheduleDimensions;
267 TileDimension = DimToVectorize;
268
269 /* Create an identity map for everything except DimToVectorize and map
270 DimToVectorize to the point loop at the innermost dimension. */
271 for (i = 0; i < ScheduleDimensions; i++)
272 {
c3284718
RS
273 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
274 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
b60cc080
TG
275
276 if (i == DimToVectorize)
c3284718 277 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
b60cc080 278 else
c3284718 279 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
b60cc080 280
c3284718 281 TilingMap = isl_map_add_constraint (TilingMap, c);
b60cc080
TG
282 }
283
284 /* it % 'VectorWidth' = 0 */
c3284718
RS
285 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
286 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
287 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
288 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
b47595f7
MN
289
290 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
291 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
c3284718
RS
292 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
293 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
b60cc080
TG
294
295 /* it <= ip */
c3284718
RS
296 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
297 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
298 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
299 TilingMap = isl_map_add_constraint (TilingMap, c);
b60cc080
TG
300
301 /* ip <= it + ('VectorWidth' - 1) */
c3284718
RS
302 c = isl_inequality_alloc (LocalSpace);
303 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
304 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
305 isl_constraint_set_constant_si (c, VectorWidth - 1);
306 TilingMap = isl_map_add_constraint (TilingMap, c);
b60cc080 307
c3284718 308 isl_map_dump (TilingMap);
b60cc080
TG
309
310 return TilingMap;
311}
312
313static bool EnablePollyVector = false;
314
315/* getScheduleForBandList - Get the scheduling map for a list of bands.
316
317 We walk recursively the forest of bands to combine the schedules of the
318 individual bands to the overall schedule. In case tiling is requested,
319 the individual bands are tiled. */
320static isl_union_map *
c3284718 321getScheduleForBandList (isl_band_list *BandList)
b60cc080
TG
322{
323 int NumBands, i;
324 isl_union_map *Schedule;
325 isl_ctx *ctx;
326
c3284718
RS
327 ctx = isl_band_list_get_ctx (BandList);
328 NumBands = isl_band_list_n_band (BandList);
329 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
b60cc080
TG
330
331 for (i = 0; i < NumBands; i++)
332 {
333 isl_band *Band;
334 isl_union_map *PartialSchedule;
335 int ScheduleDimensions;
336 isl_space *Space;
337
c3284718
RS
338 Band = isl_band_list_get_band (BandList, i);
339 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
340 Space = isl_union_map_get_space (PartialSchedule);
b60cc080 341
c3284718 342 if (isl_band_has_children (Band))
b60cc080
TG
343 {
344 isl_band_list *Children;
345 isl_union_map *SuffixSchedule;
346
c3284718
RS
347 Children = isl_band_get_children (Band);
348 SuffixSchedule = getScheduleForBandList (Children);
349 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
350 SuffixSchedule);
351 isl_band_list_free (Children);
b60cc080
TG
352 }
353 else if (EnablePollyVector)
354 {
355 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
356 {
c3284718 357 if (isl_band_member_is_zero_distance (Band, i))
b60cc080
TG
358 {
359 isl_map *TileMap;
360 isl_union_map *TileUMap;
361
c3284718
RS
362 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4);
363 TileUMap = isl_union_map_from_map (TileMap);
364 TileUMap = isl_union_map_align_params
365 (TileUMap, isl_space_copy (Space));
366 PartialSchedule = isl_union_map_apply_range
367 (PartialSchedule, TileUMap);
b60cc080
TG
368 break;
369 }
370 }
371 }
372
c3284718 373 Schedule = isl_union_map_union (Schedule, PartialSchedule);
b60cc080 374
c3284718
RS
375 isl_band_free (Band);
376 isl_space_free (Space);
b60cc080
TG
377 }
378
379 return Schedule;
380}
381
382static isl_union_map *
c3284718 383getScheduleMap (isl_schedule *Schedule)
b60cc080 384{
c3284718
RS
385 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
386 isl_union_map *ScheduleMap = getScheduleForBandList (BandList);
387 isl_band_list_free (BandList);
b60cc080
TG
388 return ScheduleMap;
389}
390
391static int
c3284718 392getSingleMap (__isl_take isl_map *map, void *user)
b60cc080
TG
393{
394 isl_map **singleMap = (isl_map **) user;
395 *singleMap = map;
396
397 return 0;
398}
399
400static void
401apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
402{
403 int i;
404 poly_bb_p pbb;
405
9771b263 406 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
b60cc080
TG
407 {
408 isl_set *domain = isl_set_copy (pbb->domain);
409 isl_union_map *stmtBand;
410 isl_map *stmtSchedule;
411
c3284718
RS
412 stmtBand = isl_union_map_intersect_domain
413 (isl_union_map_copy (schedule_map),
414 isl_union_set_from_set (domain));
415 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
416 isl_map_free (pbb->transformed);
b60cc080 417 pbb->transformed = stmtSchedule;
c3284718 418 isl_union_map_free (stmtBand);
b60cc080
TG
419 }
420}
421
422static const int CONSTANT_BOUND = 20;
423
424bool
425optimize_isl (scop_p scop)
426{
427
428 isl_schedule *schedule;
429 isl_union_set *domain;
430 isl_union_map *validity, *proximity, *dependences;
431 isl_union_map *schedule_map;
432
433 domain = scop_get_domains (scop);
434 dependences = scop_get_dependences (scop);
c3284718
RS
435 dependences = isl_union_map_gist_domain (dependences,
436 isl_union_set_copy (domain));
437 dependences = isl_union_map_gist_range (dependences,
438 isl_union_set_copy (domain));
b60cc080
TG
439 validity = dependences;
440
441 proximity = isl_union_map_copy (validity);
442
c3284718
RS
443 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
444 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
445 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
446 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
b60cc080 447 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
c3284718 448 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
b60cc080
TG
449
450 if (!schedule)
451 return false;
452
453 schedule_map = getScheduleMap (schedule);
454
455 apply_schedule_map_to_scop (scop, schedule_map);
456
457 isl_schedule_free (schedule);
458 isl_union_map_free (schedule_map);
459
460 return true;
461}
462
463#endif