]>
Commit | Line | Data |
---|---|---|
629b3d75 MJ |
1 | /* Lowering and expansion of OpenMP directives for HSA GPU agents. |
2 | ||
85ec4feb | 3 | Copyright (C) 2013-2018 Free Software Foundation, Inc. |
629b3d75 MJ |
4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 3, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "backend.h" | |
25 | #include "tree.h" | |
26 | #include "gimple.h" | |
27 | #include "tree-pass.h" | |
28 | #include "ssa.h" | |
29 | #include "cgraph.h" | |
30 | #include "pretty-print.h" | |
31 | #include "fold-const.h" | |
32 | #include "gimplify.h" | |
33 | #include "gimple-iterator.h" | |
34 | #include "gimple-walk.h" | |
35 | #include "tree-inline.h" | |
36 | #include "langhooks.h" | |
37 | #include "omp-general.h" | |
38 | #include "omp-low.h" | |
39 | #include "omp-grid.h" | |
40 | #include "gimple-pretty-print.h" | |
41 | ||
42 | /* Return the lastprivate predicate for a given gridified loop described by | |
43 | FD). */ | |
44 | ||
45 | tree | |
46 | omp_grid_lastprivate_predicate (struct omp_for_data *fd) | |
47 | { | |
48 | /* When dealing with a gridified loop, we need to check up to three collapsed | |
49 | iteration variables but they are not actually captured in this fd. | |
50 | Fortunately, we can easily rely on HSA builtins to get this | |
01914336 | 51 | information. */ |
629b3d75 MJ |
52 | |
53 | tree id, size; | |
54 | if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP | |
55 | && gimple_omp_for_grid_intra_group (fd->for_stmt)) | |
56 | { | |
57 | id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID); | |
58 | size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE); | |
59 | } | |
60 | else | |
61 | { | |
62 | id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID); | |
63 | size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE); | |
64 | } | |
65 | tree cond = NULL; | |
66 | for (int dim = 0; dim < fd->collapse; dim++) | |
67 | { | |
68 | tree dim_tree = build_int_cstu (unsigned_type_node, dim); | |
69 | tree u1 = build_int_cstu (unsigned_type_node, 1); | |
70 | tree c2 | |
71 | = build2 (EQ_EXPR, boolean_type_node, | |
72 | build2 (PLUS_EXPR, unsigned_type_node, | |
73 | build_call_expr (id, 1, dim_tree), u1), | |
74 | build_call_expr (size, 1, dim_tree)); | |
75 | if (cond) | |
76 | cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2); | |
77 | else | |
78 | cond = c2; | |
79 | } | |
80 | return cond; | |
81 | } | |
82 | ||
83 | /* Structure describing the basic properties of the loop we ara analyzing | |
01914336 | 84 | whether it can be gridified and when it is gridified. */ |
629b3d75 MJ |
85 | |
86 | struct grid_prop | |
87 | { | |
88 | /* True when we are doing tiling gridification, i.e. when there is a distinct | |
89 | distribute loop over groups and a loop construct over work-items. False | |
90 | when distribute and parallel for loops form a combined construct. */ | |
91 | bool tiling; | |
92 | /* Location of the target construct for optimization information | |
93 | messages. */ | |
4f5b9c80 | 94 | dump_user_location_t target_loc; |
629b3d75 MJ |
95 | /* The collapse clause of the involved loops. Collapse value of all of them |
96 | must be the same for gridification to take place. */ | |
97 | size_t collapse; | |
98 | /* Group sizes, if requested by the user or NULL if not requested. */ | |
99 | tree group_sizes[3]; | |
100 | }; | |
101 | ||
102 | #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \ | |
103 | "gridified HSA kernel because " | |
104 | ||
105 | /* Return true if STMT is an assignment of a register-type into a local | |
106 | VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to | |
107 | any of the trees specifying group sizes there. */ | |
108 | ||
109 | static bool | |
110 | grid_safe_assignment_p (gimple *stmt, grid_prop *grid) | |
111 | { | |
112 | gassign *assign = dyn_cast <gassign *> (stmt); | |
113 | if (!assign) | |
114 | return false; | |
115 | if (gimple_clobber_p (assign)) | |
116 | return true; | |
117 | tree lhs = gimple_assign_lhs (assign); | |
118 | if (!VAR_P (lhs) | |
119 | || !is_gimple_reg_type (TREE_TYPE (lhs)) | |
120 | || is_global_var (lhs)) | |
121 | return false; | |
122 | if (grid) | |
123 | for (unsigned i = 0; i < grid->collapse; i++) | |
124 | if (lhs == grid->group_sizes[i]) | |
125 | return false; | |
126 | return true; | |
127 | } | |
128 | ||
129 | /* Return true if all statements in SEQ are assignments to local register-type | |
130 | variables that do not hold group size information. */ | |
131 | ||
132 | static bool | |
133 | grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid) | |
134 | { | |
135 | if (!seq) | |
136 | return true; | |
137 | ||
138 | gimple_stmt_iterator gsi; | |
139 | for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) | |
140 | if (!grid_safe_assignment_p (gsi_stmt (gsi), grid)) | |
141 | return false; | |
142 | return true; | |
143 | } | |
144 | ||
145 | /* Scan statements in SEQ and call itself recursively on any bind. GRID | |
146 | describes hitherto discovered properties of the loop that is evaluated for | |
147 | possible gridification. If during whole search only assignments to | |
148 | register-type local variables (that do not overwrite group size information) | |
149 | and one single OMP statement is encountered, return true, otherwise return | |
150 | false. RET is where we store any OMP statement encountered. */ | |
151 | ||
152 | static bool | |
153 | grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid, | |
154 | const char *name, gimple **ret) | |
155 | { | |
156 | gimple_stmt_iterator gsi; | |
157 | for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) | |
158 | { | |
159 | gimple *stmt = gsi_stmt (gsi); | |
160 | ||
161 | if (grid_safe_assignment_p (stmt, grid)) | |
162 | continue; | |
163 | if (gbind *bind = dyn_cast <gbind *> (stmt)) | |
164 | { | |
01914336 MJ |
165 | gimple_seq bind_body = gimple_bind_body (bind); |
166 | if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name, | |
167 | ret)) | |
629b3d75 MJ |
168 | return false; |
169 | } | |
170 | else if (is_gimple_omp (stmt)) | |
171 | { | |
172 | if (*ret) | |
173 | { | |
174 | if (dump_enabled_p ()) | |
175 | { | |
176 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
177 | GRID_MISSED_MSG_PREFIX "%s construct " | |
178 | "contains multiple OpenMP constructs\n", | |
179 | name); | |
4f5b9c80 | 180 | dump_printf_loc (MSG_NOTE, *ret, |
629b3d75 MJ |
181 | "The first OpenMP construct within " |
182 | "a parallel\n"); | |
4f5b9c80 | 183 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
184 | "The second OpenMP construct within " |
185 | "a parallel\n"); | |
186 | } | |
187 | return false; | |
188 | } | |
189 | *ret = stmt; | |
190 | } | |
191 | else | |
192 | { | |
193 | if (dump_enabled_p ()) | |
194 | { | |
195 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
196 | GRID_MISSED_MSG_PREFIX "%s construct contains " | |
197 | "a complex statement\n", name); | |
4f5b9c80 | 198 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
199 | "This statement cannot be analyzed for " |
200 | "gridification\n"); | |
201 | } | |
202 | return false; | |
203 | } | |
204 | } | |
205 | return true; | |
206 | } | |
207 | ||
208 | /* Scan statements in SEQ and make sure that it and any binds in it contain | |
209 | only assignments to local register-type variables (that do not overwrite | |
210 | group size information) and one OMP construct. If so, return that | |
211 | construct, otherwise return NULL. GRID describes hitherto discovered | |
212 | properties of the loop that is evaluated for possible gridification. If | |
213 | dumping is enabled and function fails, use NAME to dump a note with the | |
214 | reason for failure. */ | |
215 | ||
216 | static gimple * | |
217 | grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid, | |
218 | const char *name) | |
219 | { | |
220 | if (!seq) | |
221 | { | |
222 | if (dump_enabled_p ()) | |
223 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
224 | GRID_MISSED_MSG_PREFIX "%s construct has empty body\n", | |
225 | name); | |
226 | return NULL; | |
227 | } | |
228 | ||
229 | gimple *ret = NULL; | |
230 | if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret)) | |
231 | { | |
232 | if (!ret && dump_enabled_p ()) | |
233 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
234 | GRID_MISSED_MSG_PREFIX "%s construct does not contain" | |
aa326bfb | 235 | " any other OpenMP construct\n", name); |
629b3d75 MJ |
236 | return ret; |
237 | } | |
238 | else | |
239 | return NULL; | |
240 | } | |
241 | ||
242 | /* Walker function looking for statements there is no point gridifying (and for | |
243 | noreturn function calls which we cannot do). Return non-NULL if such a | |
244 | function is found. */ | |
245 | ||
246 | static tree | |
247 | grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, | |
248 | bool *handled_ops_p, | |
249 | struct walk_stmt_info *wi) | |
250 | { | |
251 | *handled_ops_p = false; | |
252 | gimple *stmt = gsi_stmt (*gsi); | |
253 | switch (gimple_code (stmt)) | |
254 | { | |
255 | case GIMPLE_CALL: | |
256 | if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) | |
257 | { | |
258 | *handled_ops_p = true; | |
259 | wi->info = stmt; | |
260 | return error_mark_node; | |
261 | } | |
262 | break; | |
263 | ||
264 | /* We may reduce the following list if we find a way to implement the | |
265 | clauses, but now there is no point trying further. */ | |
266 | case GIMPLE_OMP_CRITICAL: | |
267 | case GIMPLE_OMP_TASKGROUP: | |
268 | case GIMPLE_OMP_TASK: | |
269 | case GIMPLE_OMP_SECTION: | |
270 | case GIMPLE_OMP_SECTIONS: | |
271 | case GIMPLE_OMP_SECTIONS_SWITCH: | |
272 | case GIMPLE_OMP_TARGET: | |
273 | case GIMPLE_OMP_ORDERED: | |
274 | *handled_ops_p = true; | |
275 | wi->info = stmt; | |
276 | return error_mark_node; | |
277 | default: | |
278 | break; | |
279 | } | |
280 | return NULL; | |
281 | } | |
282 | ||
283 | /* Examine clauses of omp parallel statement PAR and if any prevents | |
284 | gridification, issue a missed-optimization diagnostics and return false, | |
285 | otherwise return true. GRID describes hitherto discovered properties of the | |
286 | loop that is evaluated for possible gridification. */ | |
287 | ||
288 | static bool | |
4f5b9c80 | 289 | grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc) |
629b3d75 MJ |
290 | { |
291 | tree clauses = gimple_omp_parallel_clauses (par); | |
292 | while (clauses) | |
293 | { | |
294 | switch (OMP_CLAUSE_CODE (clauses)) | |
295 | { | |
296 | case OMP_CLAUSE_NUM_THREADS: | |
297 | if (dump_enabled_p ()) | |
298 | { | |
299 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
300 | GRID_MISSED_MSG_PREFIX "because there is " | |
301 | "a num_threads clause of the parallel " | |
302 | "construct\n"); | |
4f5b9c80 | 303 | dump_printf_loc (MSG_NOTE, par, |
629b3d75 MJ |
304 | "Parallel construct has a num_threads clause\n"); |
305 | } | |
306 | return false; | |
307 | ||
308 | case OMP_CLAUSE_REDUCTION: | |
309 | if (dump_enabled_p ()) | |
310 | { | |
311 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
aa326bfb | 312 | GRID_MISSED_MSG_PREFIX "a reduction clause " |
629b3d75 | 313 | "is present\n "); |
4f5b9c80 | 314 | dump_printf_loc (MSG_NOTE, par, |
629b3d75 MJ |
315 | "Parallel construct has a reduction clause\n"); |
316 | } | |
317 | return false; | |
318 | ||
319 | default: | |
320 | break; | |
321 | } | |
322 | clauses = OMP_CLAUSE_CHAIN (clauses); | |
323 | } | |
324 | return true; | |
325 | } | |
326 | ||
327 | /* Examine clauses and the body of omp loop statement GFOR and if something | |
328 | prevents gridification, issue a missed-optimization diagnostics and return | |
01914336 | 329 | false, otherwise return true. GRID describes hitherto discovered properties |
629b3d75 MJ |
330 | of the loop that is evaluated for possible gridification. */ |
331 | ||
332 | static bool | |
333 | grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) | |
334 | { | |
335 | if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor), | |
336 | grid)) | |
337 | { | |
338 | if (dump_enabled_p ()) | |
339 | { | |
340 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
341 | GRID_MISSED_MSG_PREFIX "the inner loop " | |
342 | "loop bounds computation contains a complex " | |
343 | "statement\n"); | |
4f5b9c80 | 344 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
345 | "Loop construct cannot be analyzed for " |
346 | "gridification\n"); | |
347 | } | |
348 | return false; | |
349 | } | |
350 | ||
351 | tree clauses = gimple_omp_for_clauses (gfor); | |
352 | while (clauses) | |
353 | { | |
354 | switch (OMP_CLAUSE_CODE (clauses)) | |
355 | { | |
356 | case OMP_CLAUSE_SCHEDULE: | |
357 | if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) | |
358 | { | |
359 | if (dump_enabled_p ()) | |
360 | { | |
361 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
362 | GRID_MISSED_MSG_PREFIX "the inner loop " | |
363 | "has a non-automatic schedule clause\n"); | |
4f5b9c80 | 364 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
365 | "Loop construct has a non automatic " |
366 | "schedule clause\n"); | |
367 | } | |
368 | return false; | |
369 | } | |
370 | break; | |
371 | ||
372 | case OMP_CLAUSE_REDUCTION: | |
373 | if (dump_enabled_p ()) | |
374 | { | |
375 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
376 | GRID_MISSED_MSG_PREFIX "a reduction " | |
377 | "clause is present\n "); | |
4f5b9c80 | 378 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
379 | "Loop construct has a reduction schedule " |
380 | "clause\n"); | |
381 | } | |
382 | return false; | |
383 | ||
384 | default: | |
385 | break; | |
386 | } | |
387 | clauses = OMP_CLAUSE_CHAIN (clauses); | |
388 | } | |
389 | struct walk_stmt_info wi; | |
390 | memset (&wi, 0, sizeof (wi)); | |
391 | if (walk_gimple_seq (gimple_omp_body (gfor), | |
392 | grid_find_ungridifiable_statement, | |
393 | NULL, &wi)) | |
394 | { | |
395 | gimple *bad = (gimple *) wi.info; | |
396 | if (dump_enabled_p ()) | |
397 | { | |
398 | if (is_gimple_call (bad)) | |
399 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
400 | GRID_MISSED_MSG_PREFIX "the inner loop contains " | |
401 | "call to a noreturn function\n"); | |
402 | else | |
403 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
404 | GRID_MISSED_MSG_PREFIX "the inner loop contains " | |
405 | "statement %s which cannot be transformed\n", | |
406 | gimple_code_name[(int) gimple_code (bad)]); | |
4f5b9c80 | 407 | dump_printf_loc (MSG_NOTE, bad, |
629b3d75 MJ |
408 | "This statement cannot be analyzed for " |
409 | "gridification\n"); | |
410 | } | |
411 | return false; | |
412 | } | |
413 | return true; | |
414 | } | |
415 | ||
416 | /* Given distribute omp construct represented by DIST, which in the original | |
417 | source forms a compound construct with a looping construct, return true if it | |
01914336 | 418 | can be turned into a gridified HSA kernel. Otherwise return false. GRID |
629b3d75 MJ |
419 | describes hitherto discovered properties of the loop that is evaluated for |
420 | possible gridification. */ | |
421 | ||
422 | static bool | |
423 | grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid) | |
424 | { | |
4f5b9c80 | 425 | dump_user_location_t tloc = grid->target_loc; |
629b3d75 MJ |
426 | gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), |
427 | grid, "distribute"); | |
428 | gomp_parallel *par; | |
429 | if (!stmt | |
430 | || !(par = dyn_cast <gomp_parallel *> (stmt)) | |
431 | || !grid_parallel_clauses_gridifiable (par, tloc)) | |
432 | return false; | |
433 | ||
434 | stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid, | |
435 | "parallel"); | |
436 | gomp_for *gfor; | |
437 | if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) | |
438 | return false; | |
439 | ||
440 | if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) | |
441 | { | |
442 | if (dump_enabled_p ()) | |
443 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
444 | GRID_MISSED_MSG_PREFIX "the inner loop is not " | |
445 | "a simple for loop\n"); | |
446 | return false; | |
447 | } | |
448 | gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse); | |
449 | ||
450 | if (!grid_inner_loop_gridifiable_p (gfor, grid)) | |
451 | return false; | |
452 | ||
453 | return true; | |
454 | } | |
455 | ||
456 | /* Given an omp loop statement GFOR, return true if it can participate in | |
457 | tiling gridification, i.e. in one where the distribute and parallel for | |
458 | loops do not form a compound statement. GRID describes hitherto discovered | |
01914336 | 459 | properties of the loop that is evaluated for possible gridification. */ |
629b3d75 MJ |
460 | |
461 | static bool | |
462 | grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid) | |
463 | { | |
464 | if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) | |
465 | { | |
466 | if (dump_enabled_p ()) | |
467 | { | |
468 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
469 | GRID_MISSED_MSG_PREFIX "an inner loop is not " | |
470 | "a simple for loop\n"); | |
4f5b9c80 | 471 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
472 | "This statement is not a simple for loop\n"); |
473 | } | |
474 | return false; | |
475 | } | |
476 | ||
477 | if (!grid_inner_loop_gridifiable_p (gfor, grid)) | |
478 | return false; | |
479 | ||
480 | if (gimple_omp_for_collapse (gfor) != grid->collapse) | |
481 | { | |
482 | if (dump_enabled_p ()) | |
483 | { | |
484 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
485 | GRID_MISSED_MSG_PREFIX "an inner loop does not " | |
486 | "have use the same collapse clause\n"); | |
4f5b9c80 | 487 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
488 | "Loop construct uses a different collapse clause\n"); |
489 | } | |
490 | return false; | |
491 | } | |
492 | ||
493 | struct omp_for_data fd; | |
494 | struct omp_for_data_loop *loops | |
495 | = (struct omp_for_data_loop *)alloca (grid->collapse | |
496 | * sizeof (struct omp_for_data_loop)); | |
497 | omp_extract_for_data (gfor, &fd, loops); | |
498 | for (unsigned i = 0; i < grid->collapse; i++) | |
499 | { | |
500 | tree itype, type = TREE_TYPE (fd.loops[i].v); | |
501 | if (POINTER_TYPE_P (type)) | |
502 | itype = signed_type_for (type); | |
503 | else | |
504 | itype = type; | |
505 | ||
506 | tree n1 = fold_convert (itype, fd.loops[i].n1); | |
507 | tree n2 = fold_convert (itype, fd.loops[i].n2); | |
508 | tree t = build_int_cst (itype, | |
509 | (fd.loops[i].cond_code == LT_EXPR ? -1 : 1)); | |
510 | t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t); | |
511 | t = fold_build2 (PLUS_EXPR, itype, t, n2); | |
512 | t = fold_build2 (MINUS_EXPR, itype, t, n1); | |
513 | if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR) | |
514 | t = fold_build2 (TRUNC_DIV_EXPR, itype, | |
515 | fold_build1 (NEGATE_EXPR, itype, t), | |
516 | fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step)); | |
517 | else | |
518 | t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step); | |
519 | ||
520 | if (!operand_equal_p (grid->group_sizes[i], t, 0)) | |
521 | { | |
522 | if (dump_enabled_p ()) | |
523 | { | |
524 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
525 | GRID_MISSED_MSG_PREFIX "the distribute and " | |
526 | "an internal loop do not agree on tile size\n"); | |
4f5b9c80 | 527 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
528 | "Loop construct does not seem to loop over " |
529 | "a tile size\n"); | |
530 | } | |
531 | return false; | |
532 | } | |
533 | } | |
534 | return true; | |
535 | } | |
536 | ||
537 | /* Facing a call to FNDECL in the body of a distribute construct, return true | |
538 | if we can handle it or false if it precludes gridification. */ | |
539 | ||
540 | static bool | |
541 | grid_call_permissible_in_distribute_p (tree fndecl) | |
542 | { | |
543 | if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) | |
544 | return true; | |
545 | ||
546 | const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); | |
547 | if (strstr (name, "omp_") != name) | |
548 | return false; | |
549 | ||
550 | if ((strcmp (name, "omp_get_thread_num") == 0) | |
551 | || (strcmp (name, "omp_get_num_threads") == 0) | |
552 | || (strcmp (name, "omp_get_num_teams") == 0) | |
553 | || (strcmp (name, "omp_get_team_num") == 0) | |
554 | || (strcmp (name, "omp_get_level") == 0) | |
555 | || (strcmp (name, "omp_get_active_level") == 0) | |
556 | || (strcmp (name, "omp_in_parallel") == 0)) | |
557 | return true; | |
558 | ||
559 | return false; | |
560 | } | |
561 | ||
562 | /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body | |
563 | of a distribute construct that is pointed at by GSI, modify it as necessary | |
564 | for gridification. If the statement itself got removed, return true. */ | |
565 | ||
566 | static bool | |
567 | grid_handle_call_in_distribute (gimple_stmt_iterator *gsi) | |
568 | { | |
569 | gimple *stmt = gsi_stmt (*gsi); | |
570 | tree fndecl = gimple_call_fndecl (stmt); | |
571 | gcc_checking_assert (stmt); | |
572 | if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) | |
573 | return false; | |
574 | ||
575 | const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); | |
576 | if ((strcmp (name, "omp_get_thread_num") == 0) | |
577 | || (strcmp (name, "omp_get_level") == 0) | |
578 | || (strcmp (name, "omp_get_active_level") == 0) | |
579 | || (strcmp (name, "omp_in_parallel") == 0)) | |
580 | { | |
581 | tree lhs = gimple_call_lhs (stmt); | |
582 | if (lhs) | |
583 | { | |
584 | gassign *assign | |
585 | = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); | |
586 | gsi_insert_before (gsi, assign, GSI_SAME_STMT); | |
587 | } | |
588 | gsi_remove (gsi, true); | |
589 | return true; | |
590 | } | |
591 | ||
592 | /* The rest of the omp functions can stay as they are, HSA back-end will | |
593 | handle them correctly. */ | |
594 | gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0) | |
595 | || (strcmp (name, "omp_get_num_teams") == 0) | |
596 | || (strcmp (name, "omp_get_team_num") == 0)); | |
597 | return false; | |
598 | } | |
599 | ||
600 | /* Given a sequence of statements within a distribute omp construct or a | |
601 | parallel construct, which in the original source does not form a compound | |
602 | construct with a looping construct, return true if it does not prevent us | |
01914336 | 603 | from turning it into a gridified HSA kernel. Otherwise return false. GRID |
629b3d75 MJ |
604 | describes hitherto discovered properties of the loop that is evaluated for |
605 | possible gridification. IN_PARALLEL must be true if seq is within a | |
606 | parallel construct and flase if it is only within a distribute | |
607 | construct. */ | |
608 | ||
609 | static bool | |
610 | grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid, | |
611 | bool in_parallel) | |
612 | { | |
613 | gimple_stmt_iterator gsi; | |
614 | for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) | |
615 | { | |
616 | gimple *stmt = gsi_stmt (gsi); | |
617 | ||
618 | if (grid_safe_assignment_p (stmt, grid) | |
619 | || gimple_code (stmt) == GIMPLE_GOTO | |
620 | || gimple_code (stmt) == GIMPLE_LABEL | |
621 | || gimple_code (stmt) == GIMPLE_COND) | |
622 | continue; | |
623 | else if (gbind *bind = dyn_cast <gbind *> (stmt)) | |
624 | { | |
625 | if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind), | |
626 | grid, in_parallel)) | |
627 | return false; | |
628 | continue; | |
629 | } | |
630 | else if (gtry *try_stmt = dyn_cast <gtry *> (stmt)) | |
631 | { | |
632 | if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH) | |
633 | { | |
634 | if (dump_enabled_p ()) | |
635 | { | |
636 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
637 | GRID_MISSED_MSG_PREFIX "the distribute " | |
638 | "construct contains a try..catch region\n"); | |
4f5b9c80 | 639 | dump_printf_loc (MSG_NOTE, try_stmt, |
629b3d75 MJ |
640 | "This statement cannot be analyzed for " |
641 | "tiled gridification\n"); | |
642 | } | |
643 | return false; | |
644 | } | |
645 | if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt), | |
646 | grid, in_parallel)) | |
647 | return false; | |
648 | if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt), | |
649 | grid, in_parallel)) | |
650 | return false; | |
651 | continue; | |
652 | } | |
653 | else if (is_gimple_call (stmt)) | |
654 | { | |
655 | tree fndecl = gimple_call_fndecl (stmt); | |
656 | if (fndecl && grid_call_permissible_in_distribute_p (fndecl)) | |
657 | continue; | |
658 | ||
659 | if (dump_enabled_p ()) | |
660 | { | |
661 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
662 | GRID_MISSED_MSG_PREFIX "the distribute " | |
663 | "construct contains a call\n"); | |
4f5b9c80 | 664 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
665 | "This statement cannot be analyzed for " |
666 | "tiled gridification\n"); | |
667 | } | |
668 | return false; | |
669 | } | |
670 | else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt)) | |
671 | { | |
672 | if (in_parallel) | |
673 | { | |
674 | if (dump_enabled_p ()) | |
675 | { | |
676 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
677 | GRID_MISSED_MSG_PREFIX "a parallel " | |
678 | "construct contains another parallel " | |
679 | "construct\n"); | |
4f5b9c80 | 680 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
681 | "This parallel construct is nested in " |
682 | "another one\n"); | |
683 | } | |
684 | return false; | |
685 | } | |
686 | if (!grid_parallel_clauses_gridifiable (par, grid->target_loc) | |
687 | || !grid_dist_follows_tiling_pattern (gimple_omp_body (par), | |
688 | grid, true)) | |
689 | return false; | |
690 | } | |
691 | else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt)) | |
692 | { | |
693 | if (!in_parallel) | |
694 | { | |
695 | if (dump_enabled_p ()) | |
696 | { | |
697 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
698 | GRID_MISSED_MSG_PREFIX "a loop " | |
699 | "construct is not nested within a parallel " | |
700 | "construct\n"); | |
4f5b9c80 | 701 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
702 | "This loop construct is not nested in " |
703 | "a parallel construct\n"); | |
704 | } | |
705 | return false; | |
706 | } | |
707 | if (!grid_gfor_follows_tiling_pattern (gfor, grid)) | |
708 | return false; | |
709 | } | |
710 | else | |
711 | { | |
712 | if (dump_enabled_p ()) | |
713 | { | |
714 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
715 | GRID_MISSED_MSG_PREFIX "the distribute " | |
716 | "construct contains a complex statement\n"); | |
4f5b9c80 | 717 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
718 | "This statement cannot be analyzed for " |
719 | "tiled gridification\n"); | |
720 | } | |
721 | return false; | |
722 | } | |
723 | } | |
724 | return true; | |
725 | } | |
726 | ||
727 | /* If TARGET follows a pattern that can be turned into a gridified HSA kernel, | |
728 | return true, otherwise return false. In the case of success, also fill in | |
729 | GRID with information describing the kernel grid. */ | |
730 | ||
731 | static bool | |
732 | grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid) | |
733 | { | |
734 | if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) | |
735 | return false; | |
736 | ||
4f5b9c80 | 737 | dump_user_location_t tloc = target; |
629b3d75 MJ |
738 | grid->target_loc = tloc; |
739 | gimple *stmt | |
740 | = grid_find_single_omp_among_assignments (gimple_omp_body (target), | |
741 | grid, "target"); | |
742 | if (!stmt) | |
743 | return false; | |
744 | gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); | |
745 | tree group_size = NULL; | |
746 | if (!teams) | |
747 | { | |
748 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
749 | GRID_MISSED_MSG_PREFIX "it does not have a sole teams " | |
750 | "construct in it.\n"); | |
751 | return false; | |
752 | } | |
753 | ||
754 | tree clauses = gimple_omp_teams_clauses (teams); | |
755 | while (clauses) | |
756 | { | |
757 | switch (OMP_CLAUSE_CODE (clauses)) | |
758 | { | |
759 | case OMP_CLAUSE_NUM_TEAMS: | |
760 | if (dump_enabled_p ()) | |
761 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
762 | GRID_MISSED_MSG_PREFIX "the teams construct " | |
763 | "contains a num_teams clause\n "); | |
764 | return false; | |
765 | ||
766 | case OMP_CLAUSE_REDUCTION: | |
767 | if (dump_enabled_p ()) | |
768 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
769 | GRID_MISSED_MSG_PREFIX "a reduction " | |
770 | "clause is present\n "); | |
771 | return false; | |
772 | ||
773 | case OMP_CLAUSE_THREAD_LIMIT: | |
774 | if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0))) | |
775 | group_size = OMP_CLAUSE_OPERAND (clauses, 0); | |
776 | break; | |
777 | ||
778 | default: | |
779 | break; | |
780 | } | |
781 | clauses = OMP_CLAUSE_CHAIN (clauses); | |
782 | } | |
783 | ||
784 | stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid, | |
785 | "teams"); | |
786 | if (!stmt) | |
787 | return false; | |
788 | gomp_for *dist = dyn_cast <gomp_for *> (stmt); | |
789 | if (!dist) | |
790 | { | |
791 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
792 | GRID_MISSED_MSG_PREFIX "the teams construct does not " | |
793 | "have a single distribute construct in it.\n"); | |
794 | return false; | |
795 | } | |
796 | ||
797 | gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); | |
798 | ||
799 | grid->collapse = gimple_omp_for_collapse (dist); | |
800 | if (grid->collapse > 3) | |
801 | { | |
802 | if (dump_enabled_p ()) | |
803 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
804 | GRID_MISSED_MSG_PREFIX "the distribute construct " | |
805 | "contains collapse clause with parameter greater " | |
806 | "than 3\n"); | |
807 | return false; | |
808 | } | |
809 | ||
810 | struct omp_for_data fd; | |
811 | struct omp_for_data_loop *dist_loops | |
812 | = (struct omp_for_data_loop *)alloca (grid->collapse | |
813 | * sizeof (struct omp_for_data_loop)); | |
814 | omp_extract_for_data (dist, &fd, dist_loops); | |
815 | if (fd.chunk_size) | |
816 | { | |
817 | if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) | |
818 | { | |
819 | if (dump_enabled_p ()) | |
820 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
821 | GRID_MISSED_MSG_PREFIX "the teams " | |
822 | "thread limit is different from distribute " | |
823 | "schedule chunk\n"); | |
824 | return false; | |
825 | } | |
826 | group_size = fd.chunk_size; | |
827 | } | |
828 | if (group_size && grid->collapse > 1) | |
829 | { | |
830 | if (dump_enabled_p ()) | |
831 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
832 | GRID_MISSED_MSG_PREFIX "group size cannot be " | |
833 | "set using thread_limit or schedule clauses " | |
834 | "when also using a collapse clause greater than 1\n"); | |
835 | return false; | |
836 | } | |
837 | ||
838 | if (gimple_omp_for_combined_p (dist)) | |
839 | { | |
840 | grid->tiling = false; | |
841 | grid->group_sizes[0] = group_size; | |
842 | for (unsigned i = 1; i < grid->collapse; i++) | |
843 | grid->group_sizes[i] = NULL; | |
844 | return grid_dist_follows_simple_pattern (dist, grid); | |
845 | } | |
846 | else | |
847 | { | |
848 | grid->tiling = true; | |
849 | if (group_size) | |
850 | { | |
851 | if (dump_enabled_p ()) | |
852 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
853 | GRID_MISSED_MSG_PREFIX "group size cannot be set " | |
854 | "using thread_limit or schedule clauses when " | |
855 | "distribute and loop constructs do not form " | |
856 | "one combined construct\n"); | |
857 | return false; | |
858 | } | |
859 | for (unsigned i = 0; i < grid->collapse; i++) | |
860 | { | |
861 | if (fd.loops[i].cond_code == GT_EXPR) | |
862 | grid->group_sizes[i] = fold_build1 (NEGATE_EXPR, | |
863 | TREE_TYPE (fd.loops[i].step), | |
864 | fd.loops[i].step); | |
865 | else | |
866 | grid->group_sizes[i] = fd.loops[i].step; | |
867 | } | |
868 | return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid, | |
869 | false); | |
870 | } | |
871 | } | |
872 | ||
873 | /* Operand walker, used to remap pre-body declarations according to a hash map | |
874 | provided in DATA. */ | |
875 | ||
876 | static tree | |
877 | grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) | |
878 | { | |
879 | tree t = *tp; | |
880 | ||
881 | if (DECL_P (t) || TYPE_P (t)) | |
882 | *walk_subtrees = 0; | |
883 | else | |
884 | *walk_subtrees = 1; | |
885 | ||
886 | if (VAR_P (t)) | |
887 | { | |
888 | struct walk_stmt_info *wi = (struct walk_stmt_info *) data; | |
889 | hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; | |
890 | tree *repl = declmap->get (t); | |
891 | if (repl) | |
892 | *tp = *repl; | |
893 | } | |
894 | return NULL_TREE; | |
895 | } | |
896 | ||
897 | /* Identifiers of segments into which a particular variable should be places | |
898 | when gridifying. */ | |
899 | ||
900 | enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP, | |
901 | GRID_SEGMENT_GLOBAL}; | |
902 | ||
903 | /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial | |
904 | builtin call into SEQ that will make sure the variable is always considered | |
905 | address taken. */ | |
906 | ||
907 | static void | |
908 | grid_mark_variable_segment (tree var, enum grid_var_segment segment) | |
909 | { | |
910 | /* Making a non-addressable variables would require that we re-gimplify all | |
911 | their uses. Fortunately, we do not have to do this because if they are | |
912 | not addressable, it means they are not used in atomic or parallel | |
913 | statements and so relaxed GPU consistency rules mean we can just keep them | |
01914336 | 914 | private. */ |
629b3d75 MJ |
915 | if (!TREE_ADDRESSABLE (var)) |
916 | return; | |
917 | ||
918 | switch (segment) | |
919 | { | |
920 | case GRID_SEGMENT_GROUP: | |
921 | DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"), | |
922 | NULL, DECL_ATTRIBUTES (var)); | |
923 | break; | |
924 | case GRID_SEGMENT_GLOBAL: | |
925 | DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"), | |
926 | NULL, DECL_ATTRIBUTES (var)); | |
927 | break; | |
928 | default: | |
929 | gcc_unreachable (); | |
930 | } | |
931 | ||
932 | if (!TREE_STATIC (var)) | |
933 | { | |
934 | TREE_STATIC (var) = 1; | |
935 | varpool_node::finalize_decl (var); | |
936 | } | |
937 | ||
938 | } | |
939 | ||
940 | /* Copy leading register-type assignments to local variables in SRC to just | |
941 | before DST, Creating temporaries, adjusting mapping of operands in WI and | |
942 | remapping operands as necessary. Add any new temporaries to TGT_BIND. | |
943 | Return the first statement that does not conform to grid_safe_assignment_p | |
944 | or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all | |
945 | variables in traversed bind statements so that they are put into the | |
946 | appropriate segment. */ | |
947 | ||
948 | static gimple * | |
949 | grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, | |
950 | gbind *tgt_bind, | |
951 | enum grid_var_segment var_segment, | |
952 | struct walk_stmt_info *wi) | |
953 | { | |
954 | hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; | |
955 | gimple_stmt_iterator gsi; | |
956 | for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) | |
957 | { | |
958 | gimple *stmt = gsi_stmt (gsi); | |
959 | if (gbind *bind = dyn_cast <gbind *> (stmt)) | |
960 | { | |
961 | gimple *r = grid_copy_leading_local_assignments | |
962 | (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi); | |
963 | ||
964 | if (var_segment != GRID_SEGMENT_PRIVATE) | |
01914336 MJ |
965 | for (tree var = gimple_bind_vars (bind); |
966 | var; | |
967 | var = DECL_CHAIN (var)) | |
629b3d75 MJ |
968 | grid_mark_variable_segment (var, var_segment); |
969 | if (r) | |
970 | return r; | |
971 | else | |
972 | continue; | |
973 | } | |
974 | if (!grid_safe_assignment_p (stmt, NULL)) | |
975 | return stmt; | |
976 | tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt)); | |
977 | tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), | |
978 | TREE_TYPE (lhs)); | |
979 | DECL_CONTEXT (repl) = current_function_decl; | |
980 | gimple_bind_append_vars (tgt_bind, repl); | |
981 | ||
982 | declmap->put (lhs, repl); | |
983 | gassign *copy = as_a <gassign *> (gimple_copy (stmt)); | |
984 | walk_gimple_op (copy, grid_remap_prebody_decls, wi); | |
985 | gsi_insert_before (dst, copy, GSI_SAME_STMT); | |
986 | } | |
987 | return NULL; | |
988 | } | |
989 | ||
990 | /* Statement walker function to make adjustments to statements within the | |
991 | gridifed kernel copy. */ | |
992 | ||
993 | static tree | |
994 | grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, | |
995 | struct walk_stmt_info *) | |
996 | { | |
997 | *handled_ops_p = false; | |
998 | gimple *stmt = gsi_stmt (*gsi); | |
999 | if (gimple_code (stmt) == GIMPLE_OMP_FOR | |
1000 | && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)) | |
1001 | { | |
1002 | gomp_for *loop = as_a <gomp_for *> (stmt); | |
1003 | tree clauses = gimple_omp_for_clauses (loop); | |
1004 | tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN); | |
1005 | if (cl) | |
1006 | OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; | |
1007 | else | |
1008 | { | |
1009 | tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); | |
1010 | OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; | |
1011 | OMP_CLAUSE_CHAIN (c) = clauses; | |
1012 | gimple_omp_for_set_clauses (loop, c); | |
1013 | } | |
1014 | } | |
1015 | return NULL_TREE; | |
1016 | } | |
1017 | ||
1018 | /* Given a PARLOOP that is a normal for looping construct but also a part of a | |
1019 | combined construct with a simd loop, eliminate the simd loop. */ | |
1020 | ||
1021 | static void | |
1022 | grid_eliminate_combined_simd_part (gomp_for *parloop) | |
1023 | { | |
1024 | struct walk_stmt_info wi; | |
1025 | ||
1026 | memset (&wi, 0, sizeof (wi)); | |
1027 | wi.val_only = true; | |
1028 | enum gf_mask msk = GF_OMP_FOR_SIMD; | |
1029 | wi.info = (void *) &msk; | |
1030 | walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi); | |
1031 | gimple *stmt = (gimple *) wi.info; | |
1032 | /* We expect that the SIMD id the only statement in the parallel loop. */ | |
1033 | gcc_assert (stmt | |
1034 | && gimple_code (stmt) == GIMPLE_OMP_FOR | |
1035 | && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD) | |
1036 | && gimple_omp_for_combined_into_p (stmt) | |
1037 | && !gimple_omp_for_combined_p (stmt)); | |
1038 | gomp_for *simd = as_a <gomp_for *> (stmt); | |
1039 | ||
1040 | /* Copy over the iteration properties because the body refers to the index in | |
1041 | the bottmom-most loop. */ | |
1042 | unsigned i, collapse = gimple_omp_for_collapse (parloop); | |
1043 | gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); | |
1044 | for (i = 0; i < collapse; i++) | |
1045 | { | |
1046 | gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); | |
1047 | gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); | |
1048 | gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); | |
1049 | gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); | |
1050 | } | |
1051 | ||
1052 | tree *tgt= gimple_omp_for_clauses_ptr (parloop); | |
1053 | while (*tgt) | |
1054 | tgt = &OMP_CLAUSE_CHAIN (*tgt); | |
1055 | ||
1056 | /* Copy over all clauses, except for linaer clauses, which are turned into | |
1057 | private clauses, and all other simd-specificl clauses, which are | |
1058 | ignored. */ | |
1059 | tree *pc = gimple_omp_for_clauses_ptr (simd); | |
1060 | while (*pc) | |
1061 | { | |
1062 | tree c = *pc; | |
1063 | switch (TREE_CODE (c)) | |
1064 | { | |
1065 | case OMP_CLAUSE_LINEAR: | |
1066 | { | |
1067 | tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); | |
1068 | OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); | |
1069 | OMP_CLAUSE_CHAIN (priv) = NULL; | |
1070 | *tgt = priv; | |
1071 | tgt = &OMP_CLAUSE_CHAIN (priv); | |
1072 | pc = &OMP_CLAUSE_CHAIN (c); | |
1073 | break; | |
1074 | } | |
1075 | ||
1076 | case OMP_CLAUSE_SAFELEN: | |
1077 | case OMP_CLAUSE_SIMDLEN: | |
1078 | case OMP_CLAUSE_ALIGNED: | |
1079 | pc = &OMP_CLAUSE_CHAIN (c); | |
1080 | break; | |
1081 | ||
1082 | default: | |
1083 | *pc = OMP_CLAUSE_CHAIN (c); | |
1084 | OMP_CLAUSE_CHAIN (c) = NULL; | |
1085 | *tgt = c; | |
1086 | tgt = &OMP_CLAUSE_CHAIN(c); | |
1087 | break; | |
1088 | } | |
1089 | } | |
1090 | ||
1091 | /* Finally, throw away the simd and mark the parallel loop as not | |
1092 | combined. */ | |
1093 | gimple_omp_set_body (parloop, gimple_omp_body (simd)); | |
1094 | gimple_omp_for_set_combined_p (parloop, false); | |
1095 | } | |
1096 | ||
1097 | /* Statement walker function marking all parallels as grid_phony and loops as | |
1098 | grid ones representing threads of a particular thread group. */ | |
1099 | ||
1100 | static tree | |
1101 | grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, | |
1102 | struct walk_stmt_info *wi_in) | |
1103 | { | |
1104 | *handled_ops_p = false; | |
1105 | if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi))) | |
1106 | { | |
1107 | *handled_ops_p = true; | |
1108 | gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); | |
1109 | gimple_omp_for_set_grid_intra_group (loop, true); | |
1110 | if (gimple_omp_for_combined_p (loop)) | |
1111 | grid_eliminate_combined_simd_part (loop); | |
1112 | ||
1113 | struct walk_stmt_info body_wi; | |
1114 | memset (&body_wi, 0, sizeof (body_wi)); | |
1115 | walk_gimple_seq_mod (gimple_omp_body_ptr (loop), | |
1116 | grid_process_grid_body, NULL, &body_wi); | |
1117 | ||
1118 | gbind *bind = (gbind *) wi_in->info; | |
1119 | tree c; | |
1120 | for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) | |
1121 | if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) | |
1122 | { | |
1123 | push_gimplify_context (); | |
1124 | tree ov = OMP_CLAUSE_DECL (c); | |
1125 | tree gv = copy_var_decl (ov, create_tmp_var_name (NULL), | |
1126 | TREE_TYPE (ov)); | |
1127 | ||
1128 | grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP); | |
1129 | DECL_CONTEXT (gv) = current_function_decl; | |
1130 | gimple_bind_append_vars (bind, gv); | |
1131 | tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov); | |
1132 | gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c)); | |
1133 | x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv); | |
1134 | gimple_seq l = NULL; | |
1135 | gimplify_and_add (x, &l); | |
1136 | gsi_insert_seq_after (gsi, l, GSI_SAME_STMT); | |
1137 | pop_gimplify_context (bind); | |
1138 | } | |
1139 | } | |
1140 | return NULL_TREE; | |
1141 | } | |
1142 | ||
1143 | /* Statement walker function marking all parallels as grid_phony and loops as | |
1144 | grid ones representing threads of a particular thread group. */ | |
1145 | ||
1146 | static tree | |
1147 | grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi, | |
1148 | bool *handled_ops_p, | |
1149 | struct walk_stmt_info *wi_in) | |
1150 | { | |
1151 | *handled_ops_p = false; | |
1152 | wi_in->removed_stmt = false; | |
1153 | gimple *stmt = gsi_stmt (*gsi); | |
1154 | if (gbind *bind = dyn_cast <gbind *> (stmt)) | |
1155 | { | |
1156 | for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) | |
1157 | grid_mark_variable_segment (var, GRID_SEGMENT_GROUP); | |
1158 | } | |
1159 | else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt)) | |
1160 | { | |
1161 | *handled_ops_p = true; | |
1162 | gimple_omp_parallel_set_grid_phony (parallel, true); | |
1163 | ||
1164 | gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); | |
1165 | gimple_bind_set_body (new_bind, gimple_omp_body (parallel)); | |
1166 | gimple_seq s = NULL; | |
1167 | gimple_seq_add_stmt (&s, new_bind); | |
1168 | gimple_omp_set_body (parallel, s); | |
1169 | ||
1170 | struct walk_stmt_info wi_par; | |
1171 | memset (&wi_par, 0, sizeof (wi_par)); | |
1172 | wi_par.info = new_bind; | |
1173 | walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind), | |
1174 | grid_mark_tiling_loops, NULL, &wi_par); | |
1175 | } | |
1176 | else if (is_a <gcall *> (stmt)) | |
1177 | wi_in->removed_stmt = grid_handle_call_in_distribute (gsi); | |
1178 | return NULL_TREE; | |
1179 | } | |
1180 | ||
1181 | /* Given freshly copied top level kernel SEQ, identify the individual OMP | |
1182 | components, mark them as part of kernel, copy assignment leading to them | |
1183 | just before DST, remapping them using WI and adding new temporaries to | |
1184 | TGT_BIND, and and return the loop that will be used for kernel dispatch. */ | |
1185 | ||
1186 | static gomp_for * | |
1187 | grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, | |
1188 | gimple_stmt_iterator *dst, | |
1189 | gbind *tgt_bind, struct walk_stmt_info *wi) | |
1190 | { | |
1191 | gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, | |
1192 | GRID_SEGMENT_GLOBAL, wi); | |
1193 | gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); | |
1194 | gcc_assert (teams); | |
1195 | gimple_omp_teams_set_grid_phony (teams, true); | |
1196 | stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, | |
01914336 MJ |
1197 | tgt_bind, GRID_SEGMENT_GLOBAL, |
1198 | wi); | |
629b3d75 MJ |
1199 | gcc_checking_assert (stmt); |
1200 | gomp_for *dist = dyn_cast <gomp_for *> (stmt); | |
1201 | gcc_assert (dist); | |
1202 | gimple_seq prebody = gimple_omp_for_pre_body (dist); | |
1203 | if (prebody) | |
1204 | grid_copy_leading_local_assignments (prebody, dst, tgt_bind, | |
1205 | GRID_SEGMENT_GROUP, wi); | |
1206 | ||
1207 | if (grid->tiling) | |
1208 | { | |
1209 | gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP); | |
1210 | gimple_omp_for_set_grid_group_iter (dist, true); | |
1211 | ||
1212 | struct walk_stmt_info wi_tiled; | |
1213 | memset (&wi_tiled, 0, sizeof (wi_tiled)); | |
1214 | walk_gimple_seq_mod (gimple_omp_body_ptr (dist), | |
1215 | grid_mark_tiling_parallels_and_loops, NULL, | |
1216 | &wi_tiled); | |
1217 | return dist; | |
1218 | } | |
1219 | else | |
1220 | { | |
1221 | gimple_omp_for_set_grid_phony (dist, true); | |
1222 | stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, | |
1223 | tgt_bind, | |
1224 | GRID_SEGMENT_PRIVATE, wi); | |
1225 | gcc_checking_assert (stmt); | |
1226 | gomp_parallel *parallel = as_a <gomp_parallel *> (stmt); | |
1227 | gimple_omp_parallel_set_grid_phony (parallel, true); | |
1228 | stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), | |
1229 | dst, tgt_bind, | |
1230 | GRID_SEGMENT_PRIVATE, wi); | |
1231 | gomp_for *inner_loop = as_a <gomp_for *> (stmt); | |
1232 | gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); | |
1233 | prebody = gimple_omp_for_pre_body (inner_loop); | |
1234 | if (prebody) | |
1235 | grid_copy_leading_local_assignments (prebody, dst, tgt_bind, | |
1236 | GRID_SEGMENT_PRIVATE, wi); | |
1237 | ||
1238 | if (gimple_omp_for_combined_p (inner_loop)) | |
1239 | grid_eliminate_combined_simd_part (inner_loop); | |
5de73c05 | 1240 | struct walk_stmt_info body_wi; |
629b3d75 MJ |
1241 | memset (&body_wi, 0, sizeof (body_wi)); |
1242 | walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), | |
1243 | grid_process_grid_body, NULL, &body_wi); | |
1244 | ||
1245 | return inner_loop; | |
1246 | } | |
1247 | } | |
1248 | ||
1249 | /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, | |
1250 | create a GPU kernel for it. GSI must point to the same statement, TGT_BIND | |
1251 | is the bind into which temporaries inserted before TARGET should be | |
1252 | added. */ | |
1253 | ||
1254 | static void | |
1255 | grid_attempt_target_gridification (gomp_target *target, | |
1256 | gimple_stmt_iterator *gsi, | |
1257 | gbind *tgt_bind) | |
1258 | { | |
1259 | /* removed group_size */ | |
4f5b9c80 | 1260 | grid_prop grid = {}; |
629b3d75 MJ |
1261 | if (!target || !grid_target_follows_gridifiable_pattern (target, &grid)) |
1262 | return; | |
1263 | ||
1264 | location_t loc = gimple_location (target); | |
1265 | if (dump_enabled_p ()) | |
4f5b9c80 | 1266 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target, |
629b3d75 MJ |
1267 | "Target construct will be turned into a gridified HSA " |
1268 | "kernel\n"); | |
1269 | ||
1270 | /* Copy target body to a GPUKERNEL construct: */ | |
1271 | gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals | |
1272 | (gimple_omp_body (target)); | |
1273 | ||
1274 | hash_map<tree, tree> *declmap = new hash_map<tree, tree>; | |
1275 | struct walk_stmt_info wi; | |
1276 | memset (&wi, 0, sizeof (struct walk_stmt_info)); | |
1277 | wi.info = declmap; | |
1278 | ||
1279 | /* Copy assignments in between OMP statements before target, mark OMP | |
1280 | statements within copy appropriately. */ | |
1281 | gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi, | |
1282 | tgt_bind, &wi); | |
1283 | ||
01914336 MJ |
1284 | gbind *old_bind |
1285 | = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target))); | |
629b3d75 MJ |
1286 | gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq)); |
1287 | tree new_block = gimple_bind_block (new_bind); | |
1288 | tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); | |
1289 | BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); | |
1290 | BLOCK_SUBBLOCKS (enc_block) = new_block; | |
1291 | BLOCK_SUPERCONTEXT (new_block) = enc_block; | |
1292 | gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); | |
1293 | gimple_seq_add_stmt | |
1294 | (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))), | |
1295 | gpukernel); | |
1296 | ||
1297 | for (size_t i = 0; i < grid.collapse; i++) | |
1298 | walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL); | |
1299 | push_gimplify_context (); | |
1300 | for (size_t i = 0; i < grid.collapse; i++) | |
1301 | { | |
1302 | tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i)); | |
1303 | if (POINTER_TYPE_P (type)) | |
1304 | itype = signed_type_for (type); | |
1305 | else | |
1306 | itype = type; | |
1307 | ||
1308 | enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); | |
1309 | tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); | |
1310 | walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); | |
1311 | tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); | |
1312 | walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); | |
1313 | omp_adjust_for_condition (loc, &cond_code, &n2); | |
1314 | n1 = fold_convert (itype, n1); | |
1315 | n2 = fold_convert (itype, n2); | |
1316 | ||
791929c9 | 1317 | tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2); |
629b3d75 MJ |
1318 | tree step |
1319 | = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); | |
1320 | ||
1321 | tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); | |
1322 | t = fold_build2 (PLUS_EXPR, itype, step, t); | |
1323 | t = fold_build2 (PLUS_EXPR, itype, t, n2); | |
1324 | t = fold_build2 (MINUS_EXPR, itype, t, n1); | |
1325 | if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) | |
1326 | t = fold_build2 (TRUNC_DIV_EXPR, itype, | |
1327 | fold_build1 (NEGATE_EXPR, itype, t), | |
1328 | fold_build1 (NEGATE_EXPR, itype, step)); | |
1329 | else | |
1330 | t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); | |
791929c9 | 1331 | t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype)); |
629b3d75 | 1332 | if (grid.tiling) |
01914336 MJ |
1333 | { |
1334 | if (cond_code == GT_EXPR) | |
1335 | step = fold_build1 (NEGATE_EXPR, itype, step); | |
1336 | t = fold_build2 (MULT_EXPR, itype, t, step); | |
1337 | } | |
629b3d75 MJ |
1338 | |
1339 | tree gs = fold_convert (uint32_type_node, t); | |
1340 | gimple_seq tmpseq = NULL; | |
1341 | gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); | |
1342 | if (!gimple_seq_empty_p (tmpseq)) | |
1343 | gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); | |
1344 | ||
1345 | tree ws; | |
1346 | if (grid.group_sizes[i]) | |
1347 | { | |
1348 | ws = fold_convert (uint32_type_node, grid.group_sizes[i]); | |
1349 | tmpseq = NULL; | |
1350 | gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); | |
1351 | if (!gimple_seq_empty_p (tmpseq)) | |
1352 | gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); | |
1353 | } | |
1354 | else | |
1355 | ws = build_zero_cst (uint32_type_node); | |
1356 | ||
1357 | tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); | |
1358 | OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; | |
1359 | OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; | |
1360 | OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; | |
1361 | OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); | |
1362 | gimple_omp_target_set_clauses (target, c); | |
1363 | } | |
1364 | pop_gimplify_context (tgt_bind); | |
1365 | delete declmap; | |
1366 | return; | |
1367 | } | |
1368 | ||
01914336 | 1369 | /* Walker function doing all the work for create_target_kernels. */ |
629b3d75 MJ |
1370 | |
1371 | static tree | |
1372 | grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, | |
1373 | bool *handled_ops_p, | |
1374 | struct walk_stmt_info *incoming) | |
1375 | { | |
1376 | *handled_ops_p = false; | |
1377 | ||
1378 | gimple *stmt = gsi_stmt (*gsi); | |
1379 | gomp_target *target = dyn_cast <gomp_target *> (stmt); | |
1380 | if (target) | |
1381 | { | |
1382 | gbind *tgt_bind = (gbind *) incoming->info; | |
1383 | gcc_checking_assert (tgt_bind); | |
1384 | grid_attempt_target_gridification (target, gsi, tgt_bind); | |
1385 | return NULL_TREE; | |
1386 | } | |
1387 | gbind *bind = dyn_cast <gbind *> (stmt); | |
1388 | if (bind) | |
1389 | { | |
1390 | *handled_ops_p = true; | |
1391 | struct walk_stmt_info wi; | |
1392 | memset (&wi, 0, sizeof (wi)); | |
1393 | wi.info = bind; | |
1394 | walk_gimple_seq_mod (gimple_bind_body_ptr (bind), | |
1395 | grid_gridify_all_targets_stmt, NULL, &wi); | |
1396 | } | |
1397 | return NULL_TREE; | |
1398 | } | |
1399 | ||
1400 | /* Attempt to gridify all target constructs in BODY_P. All such targets will | |
1401 | have their bodies duplicated, with the new copy being put into a | |
1402 | gimple_omp_grid_body statement. All kernel-related construct within the | |
1403 | grid_body will be marked with phony flags or kernel kinds. Moreover, some | |
1404 | re-structuring is often needed, such as copying pre-bodies before the target | |
1405 | construct so that kernel grid sizes can be computed. */ | |
1406 | ||
1407 | void | |
1408 | omp_grid_gridify_all_targets (gimple_seq *body_p) | |
1409 | { | |
1410 | struct walk_stmt_info wi; | |
1411 | memset (&wi, 0, sizeof (wi)); | |
1412 | walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); | |
1413 | } |