]>
Commit | Line | Data |
---|---|---|
599eabdb | 1 | /* Linear Loop transforms |
ad616de1 | 2 | Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. |
599eabdb DB |
3 | Contributed by Daniel Berlin <dberlin@dberlin.org>. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
19 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | 02111-1307, USA. */ | |
21 | ||
22 | ||
23 | #include "config.h" | |
24 | #include "system.h" | |
25 | #include "coretypes.h" | |
26 | #include "tm.h" | |
27 | #include "errors.h" | |
28 | #include "ggc.h" | |
29 | #include "tree.h" | |
30 | #include "target.h" | |
31 | ||
32 | #include "rtl.h" | |
33 | #include "basic-block.h" | |
34 | #include "diagnostic.h" | |
35 | #include "tree-flow.h" | |
36 | #include "tree-dump.h" | |
37 | #include "timevar.h" | |
38 | #include "cfgloop.h" | |
39 | #include "expr.h" | |
40 | #include "optabs.h" | |
41 | #include "tree-chrec.h" | |
42 | #include "tree-data-ref.h" | |
43 | #include "tree-scalar-evolution.h" | |
44 | #include "tree-pass.h" | |
45 | #include "varray.h" | |
46 | #include "lambda.h" | |
47 | ||
48 | /* Linear loop transforms include any composition of interchange, | |
49 | scaling, skewing, and reversal. They are used to change the | |
50 | iteration order of loop nests in order to optimize data locality of | |
51 | traversals, or remove dependences that prevent | |
52 | parallelization/vectorization/etc. | |
53 | ||
54 | TODO: Determine reuse vectors/matrix and use it to determine optimal | |
55 | transform matrix for locality purposes. | |
56 | TODO: Completion of partial transforms. */ | |
57 | ||
1f24dd47 DB |
58 | /* Gather statistics for loop interchange. LOOP is the loop being |
59 | considered. The first loop in the considered loop nest is | |
60 | FIRST_LOOP, and consequently, the index of the considered loop is | |
61 | obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH | |
f67d92e9 DB |
62 | |
63 | Initializes: | |
64 | - DEPENDENCE_STEPS the sum of all the data dependence distances | |
1f24dd47 | 65 | carried by loop LOOP, |
f67d92e9 DB |
66 | |
67 | - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations | |
1f24dd47 | 68 | for which the loop LOOP is not carrying any dependence, |
f67d92e9 | 69 | |
1f24dd47 | 70 | - ACCESS_STRIDES the sum of all the strides in LOOP. |
f67d92e9 DB |
71 | |
72 | Example: for the following loop, | |
73 | ||
74 | | loop_1 runs 1335 times | |
75 | | loop_2 runs 1335 times | |
76 | | A[{{0, +, 1}_1, +, 1335}_2] | |
77 | | B[{{0, +, 1}_1, +, 1335}_2] | |
78 | | endloop_2 | |
79 | | A[{0, +, 1336}_1] | |
80 | | endloop_1 | |
81 | ||
82 | gather_interchange_stats (in loop_1) will return | |
83 | DEPENDENCE_STEPS = 3002 | |
84 | NB_DEPS_NOT_CARRIED_BY_LOOP = 5 | |
85 | ACCESS_STRIDES = 10694 | |
86 | ||
87 | gather_interchange_stats (in loop_2) will return | |
88 | DEPENDENCE_STEPS = 3000 | |
89 | NB_DEPS_NOT_CARRIED_BY_LOOP = 7 | |
90 | ACCESS_STRIDES = 8010 | |
c4bda9f0 | 91 | */ |
599eabdb DB |
92 | |
93 | static void | |
94 | gather_interchange_stats (varray_type dependence_relations, | |
f67d92e9 | 95 | varray_type datarefs, |
1f24dd47 DB |
96 | struct loop *loop, |
97 | struct loop *first_loop, | |
f67d92e9 DB |
98 | unsigned int *dependence_steps, |
99 | unsigned int *nb_deps_not_carried_by_loop, | |
100 | unsigned int *access_strides) | |
599eabdb DB |
101 | { |
102 | unsigned int i; | |
103 | ||
f67d92e9 | 104 | *dependence_steps = 0; |
599eabdb | 105 | *nb_deps_not_carried_by_loop = 0; |
f67d92e9 DB |
106 | *access_strides = 0; |
107 | ||
599eabdb DB |
108 | for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) |
109 | { | |
110 | int dist; | |
111 | struct data_dependence_relation *ddr = | |
112 | (struct data_dependence_relation *) | |
113 | VARRAY_GENERIC_PTR (dependence_relations, i); | |
114 | ||
c4bda9f0 DB |
115 | /* If we don't know anything about this dependence, or the distance |
116 | vector is NULL, or there is no dependence, then there is no reuse of | |
117 | data. */ | |
f67d92e9 | 118 | |
c4bda9f0 DB |
119 | if (DDR_DIST_VECT (ddr) == NULL |
120 | || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know | |
121 | || DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
122 | continue; | |
123 | ||
599eabdb | 124 | |
c4bda9f0 | 125 | |
1f24dd47 | 126 | dist = DDR_DIST_VECT (ddr)[loop->depth - first_loop->depth]; |
599eabdb | 127 | if (dist == 0) |
f67d92e9 | 128 | (*nb_deps_not_carried_by_loop) += 1; |
599eabdb | 129 | else if (dist < 0) |
f67d92e9 | 130 | (*dependence_steps) += -dist; |
599eabdb | 131 | else |
f67d92e9 DB |
132 | (*dependence_steps) += dist; |
133 | } | |
134 | ||
135 | /* Compute the access strides. */ | |
136 | for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) | |
137 | { | |
138 | unsigned int it; | |
139 | struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i); | |
140 | tree stmt = DR_STMT (dr); | |
141 | struct loop *stmt_loop = loop_containing_stmt (stmt); | |
1f24dd47 DB |
142 | struct loop *inner_loop = first_loop->inner; |
143 | ||
144 | if (inner_loop != stmt_loop | |
145 | && !flow_loop_nested_p (inner_loop, stmt_loop)) | |
f67d92e9 | 146 | continue; |
f67d92e9 DB |
147 | for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++) |
148 | { | |
149 | tree chrec = DR_ACCESS_FN (dr, it); | |
150 | tree tstride = evolution_part_in_loop_num | |
1f24dd47 | 151 | (chrec, loop->num); |
f67d92e9 DB |
152 | |
153 | if (tstride == NULL_TREE | |
154 | || TREE_CODE (tstride) != INTEGER_CST) | |
155 | continue; | |
156 | ||
157 | (*access_strides) += int_cst_value (tstride); | |
158 | } | |
599eabdb DB |
159 | } |
160 | } | |
161 | ||
c4bda9f0 DB |
162 | /* Attempt to apply interchange transformations to TRANS to maximize the |
163 | spatial and temporal locality of the loop. | |
599eabdb | 164 | Returns the new transform matrix. The smaller the reuse vector |
f67d92e9 DB |
165 | distances in the inner loops, the fewer the cache misses. |
166 | FIRST_LOOP is the loop->num of the first loop in the analyzed loop | |
167 | nest. */ | |
168 | ||
599eabdb DB |
169 | |
170 | static lambda_trans_matrix | |
171 | try_interchange_loops (lambda_trans_matrix trans, | |
172 | unsigned int depth, | |
f67d92e9 DB |
173 | varray_type dependence_relations, |
174 | varray_type datarefs, | |
1f24dd47 | 175 | struct loop *first_loop) |
599eabdb | 176 | { |
1f24dd47 DB |
177 | struct loop *loop_i; |
178 | struct loop *loop_j; | |
f67d92e9 DB |
179 | unsigned int dependence_steps_i, dependence_steps_j; |
180 | unsigned int access_strides_i, access_strides_j; | |
599eabdb DB |
181 | unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j; |
182 | struct data_dependence_relation *ddr; | |
183 | ||
184 | /* When there is an unknown relation in the dependence_relations, we | |
185 | know that it is no worth looking at this loop nest: give up. */ | |
186 | ddr = (struct data_dependence_relation *) | |
187 | VARRAY_GENERIC_PTR (dependence_relations, 0); | |
188 | if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) | |
189 | return trans; | |
190 | ||
191 | /* LOOP_I is always the outer loop. */ | |
1f24dd47 DB |
192 | for (loop_j = first_loop->inner; |
193 | loop_j; | |
194 | loop_j = loop_j->inner) | |
195 | for (loop_i = first_loop; | |
196 | loop_i->depth < loop_j->depth; | |
197 | loop_i = loop_i->inner) | |
599eabdb | 198 | { |
f67d92e9 DB |
199 | gather_interchange_stats (dependence_relations, datarefs, |
200 | loop_i, first_loop, | |
201 | &dependence_steps_i, | |
202 | &nb_deps_not_carried_by_i, | |
203 | &access_strides_i); | |
204 | gather_interchange_stats (dependence_relations, datarefs, | |
205 | loop_j, first_loop, | |
206 | &dependence_steps_j, | |
207 | &nb_deps_not_carried_by_j, | |
208 | &access_strides_j); | |
599eabdb DB |
209 | |
210 | /* Heuristics for loop interchange profitability: | |
f67d92e9 DB |
211 | |
212 | 1. (spatial locality) Inner loops should have smallest | |
213 | dependence steps. | |
214 | ||
215 | 2. (spatial locality) Inner loops should contain more | |
216 | dependence relations not carried by the loop. | |
217 | ||
218 | 3. (temporal locality) Inner loops should have smallest | |
219 | array access strides. | |
599eabdb | 220 | */ |
f67d92e9 DB |
221 | if (dependence_steps_i < dependence_steps_j |
222 | || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j | |
223 | || access_strides_i < access_strides_j) | |
599eabdb | 224 | { |
1f24dd47 DB |
225 | lambda_matrix_row_exchange (LTM_MATRIX (trans), |
226 | loop_i->depth - first_loop->depth, | |
227 | loop_j->depth - first_loop->depth); | |
599eabdb | 228 | /* Validate the resulting matrix. When the transformation |
f67d92e9 | 229 | is not valid, reverse to the previous transformation. */ |
599eabdb | 230 | if (!lambda_transform_legal_p (trans, depth, dependence_relations)) |
1f24dd47 DB |
231 | lambda_matrix_row_exchange (LTM_MATRIX (trans), |
232 | loop_i->depth - first_loop->depth, | |
233 | loop_j->depth - first_loop->depth); | |
599eabdb DB |
234 | } |
235 | } | |
f67d92e9 | 236 | |
599eabdb DB |
237 | return trans; |
238 | } | |
239 | ||
240 | /* Perform a set of linear transforms on LOOPS. */ | |
241 | ||
242 | void | |
243 | linear_transform_loops (struct loops *loops) | |
244 | { | |
245 | unsigned int i; | |
d4e6fecb NS |
246 | VEC(tree,gc) *oldivs = NULL; /* FIXME:These should really be on the |
247 | heap. (nathan 2005/04/15)*/ | |
248 | VEC(tree,gc) *invariants = NULL; /* FIXME:Likewise. */ | |
c4bda9f0 | 249 | |
599eabdb DB |
250 | for (i = 1; i < loops->num; i++) |
251 | { | |
252 | unsigned int depth = 0; | |
253 | varray_type datarefs; | |
254 | varray_type dependence_relations; | |
255 | struct loop *loop_nest = loops->parray[i]; | |
256 | struct loop *temp; | |
599eabdb DB |
257 | lambda_loopnest before, after; |
258 | lambda_trans_matrix trans; | |
259 | bool problem = false; | |
f67d92e9 | 260 | bool need_perfect_nest = false; |
599eabdb DB |
261 | /* If it's not a loop nest, we don't want it. |
262 | We also don't handle sibling loops properly, | |
263 | which are loops of the following form: | |
264 | for (i = 0; i < 50; i++) | |
265 | { | |
266 | for (j = 0; j < 50; j++) | |
267 | { | |
268 | ... | |
269 | } | |
270 | for (j = 0; j < 50; j++) | |
271 | { | |
272 | ... | |
273 | } | |
274 | } */ | |
2b271002 | 275 | if (!loop_nest || !loop_nest->inner) |
599eabdb | 276 | continue; |
d4e6fecb NS |
277 | VEC_truncate (tree, oldivs, 0); |
278 | VEC_truncate (tree, invariants, 0); | |
f67d92e9 DB |
279 | depth = 1; |
280 | for (temp = loop_nest->inner; temp; temp = temp->inner) | |
599eabdb | 281 | { |
599eabdb | 282 | /* If we have a sibling loop or multiple exit edges, jump ship. */ |
70388d94 | 283 | if (temp->next || !temp->single_exit) |
599eabdb DB |
284 | { |
285 | problem = true; | |
286 | break; | |
287 | } | |
288 | depth ++; | |
289 | } | |
290 | if (problem) | |
291 | continue; | |
292 | ||
293 | /* Analyze data references and dependence relations using scev. */ | |
294 | ||
295 | VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs"); | |
296 | VARRAY_GENERIC_PTR_INIT (dependence_relations, 10, | |
297 | "dependence_relations"); | |
298 | ||
299 | ||
300 | compute_data_dependences_for_loop (depth, loop_nest, | |
301 | &datarefs, &dependence_relations); | |
302 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
303 | { | |
304 | unsigned int j; | |
305 | for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++) | |
306 | { | |
307 | struct data_dependence_relation *ddr = | |
308 | (struct data_dependence_relation *) | |
309 | VARRAY_GENERIC_PTR (dependence_relations, j); | |
310 | ||
311 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) | |
312 | { | |
313 | fprintf (dump_file, "DISTANCE_V ("); | |
314 | print_lambda_vector (dump_file, DDR_DIST_VECT (ddr), | |
464f49d8 | 315 | DDR_SIZE_VECT (ddr)); |
599eabdb DB |
316 | fprintf (dump_file, ")\n"); |
317 | fprintf (dump_file, "DIRECTION_V ("); | |
318 | print_lambda_vector (dump_file, DDR_DIR_VECT (ddr), | |
464f49d8 | 319 | DDR_SIZE_VECT (ddr)); |
599eabdb DB |
320 | fprintf (dump_file, ")\n"); |
321 | } | |
322 | } | |
323 | fprintf (dump_file, "\n\n"); | |
324 | } | |
325 | /* Build the transformation matrix. */ | |
326 | trans = lambda_trans_matrix_new (depth, depth); | |
327 | lambda_matrix_id (LTM_MATRIX (trans), depth); | |
464f49d8 | 328 | |
f67d92e9 | 329 | trans = try_interchange_loops (trans, depth, dependence_relations, |
1f24dd47 | 330 | datarefs, loop_nest); |
f67d92e9 DB |
331 | |
332 | if (lambda_trans_matrix_id_p (trans)) | |
333 | { | |
334 | if (dump_file) | |
335 | fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n"); | |
336 | continue; | |
337 | } | |
599eabdb DB |
338 | |
339 | /* Check whether the transformation is legal. */ | |
340 | if (!lambda_transform_legal_p (trans, depth, dependence_relations)) | |
341 | { | |
342 | if (dump_file) | |
343 | fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); | |
344 | continue; | |
345 | } | |
f67d92e9 DB |
346 | if (!perfect_nest_p (loop_nest)) |
347 | need_perfect_nest = true; | |
348 | before = gcc_loopnest_to_lambda_loopnest (loops, | |
349 | loop_nest, &oldivs, | |
350 | &invariants, | |
351 | need_perfect_nest); | |
599eabdb DB |
352 | if (!before) |
353 | continue; | |
354 | ||
355 | if (dump_file) | |
356 | { | |
357 | fprintf (dump_file, "Before:\n"); | |
358 | print_lambda_loopnest (dump_file, before, 'i'); | |
359 | } | |
360 | ||
361 | after = lambda_loopnest_transform (before, trans); | |
362 | if (dump_file) | |
363 | { | |
364 | fprintf (dump_file, "After:\n"); | |
365 | print_lambda_loopnest (dump_file, after, 'u'); | |
366 | } | |
367 | lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants, | |
368 | after, trans); | |
464f49d8 DB |
369 | if (dump_file) |
370 | fprintf (dump_file, "Successfully transformed loop.\n"); | |
599eabdb DB |
371 | free_dependence_relations (dependence_relations); |
372 | free_data_refs (datarefs); | |
373 | } | |
d4e6fecb NS |
374 | VEC_free (tree, gc, oldivs); |
375 | VEC_free (tree, gc, invariants); | |
464f49d8 | 376 | scev_reset (); |
0bca51f0 | 377 | update_ssa (TODO_update_ssa); |
2b271002 | 378 | rewrite_into_loop_closed_ssa (NULL); |
599eabdb | 379 | } |