]>
Commit | Line | Data |
---|---|---|
c6bb733d | 1 | /* Heuristics and transform for loop blocking and strip mining on |
2 | polyhedral representation. | |
3 | ||
71e45bc2 | 4 | Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc. |
c6bb733d | 5 | Contributed by Sebastian Pop <sebastian.pop@amd.com> and |
6 | Pranav Garg <pranav.garg2107@gmail.com>. | |
7 | ||
8 | This file is part of GCC. | |
9 | ||
10 | GCC is free software; you can redistribute it and/or modify | |
11 | it under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | GCC is distributed in the hope that it will be useful, | |
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | GNU General Public License for more details. | |
19 | ||
20 | You should have received a copy of the GNU General Public License | |
21 | along with GCC; see the file COPYING3. If not see | |
22 | <http://www.gnu.org/licenses/>. */ | |
87e20041 | 23 | |
c6bb733d | 24 | #include "config.h" |
87e20041 | 25 | |
26 | #ifdef HAVE_cloog | |
27 | #include <isl/set.h> | |
28 | #include <isl/map.h> | |
29 | #include <isl/union_map.h> | |
30 | #include <isl/constraint.h> | |
31 | #include <cloog/cloog.h> | |
32 | #include <cloog/isl/domain.h> | |
33 | #endif | |
34 | ||
c6bb733d | 35 | #include "system.h" |
36 | #include "coretypes.h" | |
c6bb733d | 37 | #include "tree-flow.h" |
b9ed1410 | 38 | #include "dumpfile.h" |
c6bb733d | 39 | #include "cfgloop.h" |
40 | #include "tree-chrec.h" | |
41 | #include "tree-data-ref.h" | |
1e5b7b1f | 42 | #include "sese.h" |
c6bb733d | 43 | |
44 | #ifdef HAVE_cloog | |
c6bb733d | 45 | #include "graphite-poly.h" |
46 | ||
47 | ||
ee0d08ad | 48 | /* Strip mines with a factor STRIDE the scattering (time) dimension |
49 | around PBB at depth TIME_DEPTH. | |
50 | ||
51 | The following example comes from the wiki page: | |
c6bb733d | 52 | http://gcc.gnu.org/wiki/Graphite/Strip_mine |
53 | ||
54 | The strip mine of a loop with a tile of 64 can be obtained with a | |
55 | scattering function as follows: | |
56 | ||
57 | $ cat ./albert_strip_mine.cloog | |
58 | # language: C | |
59 | c | |
60 | ||
61 | # parameter {n | n >= 0} | |
62 | 1 3 | |
63 | # n 1 | |
64 | 1 1 0 | |
65 | 1 | |
66 | n | |
67 | ||
68 | 1 # Number of statements: | |
69 | ||
70 | 1 | |
71 | # {i | 0 <= i <= n} | |
72 | 2 4 | |
73 | # i n 1 | |
74 | 1 1 0 0 | |
75 | 1 -1 1 0 | |
76 | ||
77 | 0 0 0 | |
78 | 1 | |
79 | i | |
80 | ||
81 | 1 # Scattering functions | |
82 | ||
83 | 3 6 | |
84 | # NEW OLD i n 1 | |
85 | 1 -64 0 1 0 0 | |
86 | 1 64 0 -1 0 63 | |
87 | 0 0 1 -1 0 0 | |
88 | ||
89 | 1 | |
90 | NEW OLD | |
91 | ||
92 | #the output of CLooG is like this: | |
93 | #$ cloog ./albert_strip_mine.cloog | |
94 | # for (NEW=0;NEW<=floord(n,64);NEW++) { | |
95 | # for (OLD=max(64*NEW,0);OLD<=min(64*NEW+63,n);OLD++) { | |
96 | # S1(i = OLD) ; | |
97 | # } | |
98 | # } | |
99 | */ | |
100 | ||
a6ccb398 | 101 | static void |
ee0d08ad | 102 | pbb_strip_mine_time_depth (poly_bb_p pbb, int time_depth, int stride) |
c6bb733d | 103 | { |
87e20041 | 104 | isl_space *d; |
105 | isl_constraint *c; | |
106 | int iter, strip; | |
ee0d08ad | 107 | /* STRIP is the dimension that iterates with stride STRIDE. */ |
108 | /* ITER is the dimension that enumerates single iterations inside | |
109 | one strip that has at most STRIDE iterations. */ | |
110 | strip = time_depth; | |
111 | iter = strip + 2; | |
c6bb733d | 112 | |
87e20041 | 113 | pbb->transformed = isl_map_insert_dims (pbb->transformed, isl_dim_out, |
114 | strip, 2); | |
c6bb733d | 115 | |
116 | /* Lower bound of the striped loop. */ | |
87e20041 | 117 | d = isl_map_get_space (pbb->transformed); |
118 | c = isl_inequality_alloc (isl_local_space_from_space (d)); | |
119 | c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip, -stride); | |
120 | c = isl_constraint_set_coefficient_si (c, isl_dim_out, iter, 1); | |
121 | pbb->transformed = isl_map_add_constraint (pbb->transformed, c); | |
c6bb733d | 122 | |
123 | /* Upper bound of the striped loop. */ | |
87e20041 | 124 | d = isl_map_get_space (pbb->transformed); |
125 | c = isl_inequality_alloc (isl_local_space_from_space (d)); | |
126 | c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip, stride); | |
127 | c = isl_constraint_set_coefficient_si (c, isl_dim_out, iter, -1); | |
128 | c = isl_constraint_set_constant_si (c, stride - 1); | |
129 | pbb->transformed = isl_map_add_constraint (pbb->transformed, c); | |
c6bb733d | 130 | |
ee0d08ad | 131 | /* Static scheduling for ITER level. |
132 | This is mandatory to keep the 2d + 1 canonical scheduling format. */ | |
87e20041 | 133 | d = isl_map_get_space (pbb->transformed); |
134 | c = isl_equality_alloc (isl_local_space_from_space (d)); | |
135 | c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip + 1, 1); | |
136 | pbb->transformed = isl_map_add_constraint (pbb->transformed, c); | |
c6bb733d | 137 | } |
138 | ||
63835205 | 139 | /* Returns true when strip mining with STRIDE of the loop LST is |
140 | profitable. */ | |
c6bb733d | 141 | |
142 | static bool | |
63835205 | 143 | lst_strip_mine_profitable_p (lst_p lst, int stride) |
c6bb733d | 144 | { |
0ef84e3b | 145 | mpz_t niter, strip_stride; |
c6bb733d | 146 | bool res; |
147 | ||
63835205 | 148 | gcc_assert (LST_LOOP_P (lst)); |
2d6fe479 | 149 | mpz_init (strip_stride); |
150 | mpz_init (niter); | |
63835205 | 151 | |
2d6fe479 | 152 | mpz_set_si (strip_stride, stride); |
63835205 | 153 | lst_niter_for_loop (lst, niter); |
2d6fe479 | 154 | res = (mpz_cmp (niter, strip_stride) > 0); |
63835205 | 155 | |
2d6fe479 | 156 | mpz_clear (strip_stride); |
157 | mpz_clear (niter); | |
c6bb733d | 158 | return res; |
159 | } | |
160 | ||
a6ccb398 | 161 | /* Strip-mines all the loops of LST with STRIDE. Return the number of |
162 | loops strip-mined. */ | |
c6bb733d | 163 | |
a6ccb398 | 164 | static int |
68f6634f | 165 | lst_do_strip_mine_loop (lst_p lst, int depth, int stride) |
c6bb733d | 166 | { |
0befefcc | 167 | int i; |
168 | lst_p l; | |
0befefcc | 169 | poly_bb_p pbb; |
c6bb733d | 170 | |
0befefcc | 171 | if (!lst) |
a6ccb398 | 172 | return 0; |
c6bb733d | 173 | |
0befefcc | 174 | if (LST_LOOP_P (lst)) |
175 | { | |
a6ccb398 | 176 | int res = 0; |
0befefcc | 177 | |
f1f41a6c | 178 | FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l) |
a6ccb398 | 179 | res += lst_do_strip_mine_loop (l, depth, stride); |
0befefcc | 180 | |
181 | return res; | |
182 | } | |
183 | ||
184 | pbb = LST_PBB (lst); | |
a6ccb398 | 185 | pbb_strip_mine_time_depth (pbb, psct_dynamic_dim (pbb, depth), stride); |
186 | return 1; | |
c6bb733d | 187 | } |
188 | ||
68f6634f | 189 | /* Strip-mines all the loops of LST with STRIDE. When STRIDE is zero, |
a6ccb398 | 190 | read the stride from the PARAM_LOOP_BLOCK_TILE_SIZE. Return the |
191 | number of strip-mined loops. | |
68f6634f | 192 | |
193 | Strip mining transforms a loop | |
194 | ||
195 | | for (i = 0; i < N; i++) | |
196 | | S (i); | |
197 | ||
198 | into the following loop nest: | |
199 | ||
200 | | for (k = 0; k < N; k += STRIDE) | |
201 | | for (j = 0; j < STRIDE; j++) | |
202 | | S (i = k + j); | |
203 | */ | |
0befefcc | 204 | |
a6ccb398 | 205 | static int |
68f6634f | 206 | lst_do_strip_mine (lst_p lst, int stride) |
0befefcc | 207 | { |
208 | int i; | |
209 | lst_p l; | |
a6ccb398 | 210 | int res = 0; |
9a0158f5 | 211 | int depth; |
0befefcc | 212 | |
68f6634f | 213 | if (!stride) |
214 | stride = PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE); | |
215 | ||
0befefcc | 216 | if (!lst |
217 | || !LST_LOOP_P (lst)) | |
218 | return false; | |
219 | ||
f1f41a6c | 220 | FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l) |
a6ccb398 | 221 | res += lst_do_strip_mine (l, stride); |
0befefcc | 222 | |
9a0158f5 | 223 | depth = lst_depth (lst); |
224 | if (depth >= 0 | |
63835205 | 225 | && lst_strip_mine_profitable_p (lst, stride)) |
0befefcc | 226 | { |
a6ccb398 | 227 | res += lst_do_strip_mine_loop (lst, lst_depth (lst), stride); |
0befefcc | 228 | lst_add_loop_under_loop (lst); |
229 | } | |
230 | ||
231 | return res; | |
232 | } | |
233 | ||
a6ccb398 | 234 | /* Strip mines all the loops in SCOP. Returns the number of |
235 | strip-mined loops. */ | |
c6bb733d | 236 | |
a6ccb398 | 237 | int |
68f6634f | 238 | scop_do_strip_mine (scop_p scop, int stride) |
c6bb733d | 239 | { |
68f6634f | 240 | return lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), stride); |
c6bb733d | 241 | } |
242 | ||
64d8f27a | 243 | /* Loop blocks all the loops in SCOP. Returns true when we manage to |
244 | block some loops. */ | |
245 | ||
246 | bool | |
247 | scop_do_block (scop_p scop) | |
248 | { | |
64d8f27a | 249 | store_scattering (scop); |
250 | ||
a6ccb398 | 251 | /* If we don't strip mine at least two loops, or not interchange |
252 | loops, the strip mine alone will not be profitable, and the | |
253 | transform is not a loop blocking: so revert the transform. */ | |
254 | if (lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), 0) < 2 | |
255 | || scop_do_interchange (scop) == 0) | |
64d8f27a | 256 | { |
257 | restore_scattering (scop); | |
258 | return false; | |
259 | } | |
a6ccb398 | 260 | |
261 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
64d8f27a | 262 | fprintf (dump_file, "SCoP will be loop blocked.\n"); |
263 | ||
a6ccb398 | 264 | return true; |
64d8f27a | 265 | } |
266 | ||
c6bb733d | 267 | #endif |