]>
Commit | Line | Data |
---|---|---|
629b3d75 MJ |
1 | /* Lowering and expansion of OpenMP directives for HSA GPU agents. |
2 | ||
8d9254fc | 3 | Copyright (C) 2013-2020 Free Software Foundation, Inc. |
629b3d75 MJ |
4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 3, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "backend.h" | |
25 | #include "tree.h" | |
26 | #include "gimple.h" | |
27 | #include "tree-pass.h" | |
28 | #include "ssa.h" | |
29 | #include "cgraph.h" | |
30 | #include "pretty-print.h" | |
31 | #include "fold-const.h" | |
32 | #include "gimplify.h" | |
33 | #include "gimple-iterator.h" | |
34 | #include "gimple-walk.h" | |
35 | #include "tree-inline.h" | |
36 | #include "langhooks.h" | |
37 | #include "omp-general.h" | |
38 | #include "omp-low.h" | |
39 | #include "omp-grid.h" | |
40 | #include "gimple-pretty-print.h" | |
41 | ||
42 | /* Return the lastprivate predicate for a given gridified loop described by | |
43 | FD). */ | |
44 | ||
45 | tree | |
46 | omp_grid_lastprivate_predicate (struct omp_for_data *fd) | |
47 | { | |
48 | /* When dealing with a gridified loop, we need to check up to three collapsed | |
49 | iteration variables but they are not actually captured in this fd. | |
50 | Fortunately, we can easily rely on HSA builtins to get this | |
01914336 | 51 | information. */ |
629b3d75 MJ |
52 | |
53 | tree id, size; | |
54 | if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP | |
55 | && gimple_omp_for_grid_intra_group (fd->for_stmt)) | |
56 | { | |
57 | id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID); | |
58 | size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE); | |
59 | } | |
60 | else | |
61 | { | |
62 | id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID); | |
63 | size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE); | |
64 | } | |
65 | tree cond = NULL; | |
66 | for (int dim = 0; dim < fd->collapse; dim++) | |
67 | { | |
68 | tree dim_tree = build_int_cstu (unsigned_type_node, dim); | |
69 | tree u1 = build_int_cstu (unsigned_type_node, 1); | |
70 | tree c2 | |
71 | = build2 (EQ_EXPR, boolean_type_node, | |
72 | build2 (PLUS_EXPR, unsigned_type_node, | |
73 | build_call_expr (id, 1, dim_tree), u1), | |
74 | build_call_expr (size, 1, dim_tree)); | |
75 | if (cond) | |
76 | cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2); | |
77 | else | |
78 | cond = c2; | |
79 | } | |
80 | return cond; | |
81 | } | |
82 | ||
83 | /* Structure describing the basic properties of the loop we ara analyzing | |
01914336 | 84 | whether it can be gridified and when it is gridified. */ |
629b3d75 | 85 | |
6c1dae73 | 86 | class grid_prop |
629b3d75 | 87 | { |
6c1dae73 | 88 | public: |
629b3d75 MJ |
89 | /* True when we are doing tiling gridification, i.e. when there is a distinct |
90 | distribute loop over groups and a loop construct over work-items. False | |
91 | when distribute and parallel for loops form a combined construct. */ | |
92 | bool tiling; | |
93 | /* Location of the target construct for optimization information | |
94 | messages. */ | |
4f5b9c80 | 95 | dump_user_location_t target_loc; |
629b3d75 MJ |
96 | /* The collapse clause of the involved loops. Collapse value of all of them |
97 | must be the same for gridification to take place. */ | |
98 | size_t collapse; | |
99 | /* Group sizes, if requested by the user or NULL if not requested. */ | |
100 | tree group_sizes[3]; | |
101 | }; | |
102 | ||
103 | #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \ | |
104 | "gridified HSA kernel because " | |
105 | ||
106 | /* Return true if STMT is an assignment of a register-type into a local | |
107 | VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to | |
108 | any of the trees specifying group sizes there. */ | |
109 | ||
110 | static bool | |
111 | grid_safe_assignment_p (gimple *stmt, grid_prop *grid) | |
112 | { | |
113 | gassign *assign = dyn_cast <gassign *> (stmt); | |
114 | if (!assign) | |
115 | return false; | |
116 | if (gimple_clobber_p (assign)) | |
117 | return true; | |
118 | tree lhs = gimple_assign_lhs (assign); | |
119 | if (!VAR_P (lhs) | |
120 | || !is_gimple_reg_type (TREE_TYPE (lhs)) | |
121 | || is_global_var (lhs)) | |
122 | return false; | |
123 | if (grid) | |
124 | for (unsigned i = 0; i < grid->collapse; i++) | |
125 | if (lhs == grid->group_sizes[i]) | |
126 | return false; | |
127 | return true; | |
128 | } | |
129 | ||
130 | /* Return true if all statements in SEQ are assignments to local register-type | |
131 | variables that do not hold group size information. */ | |
132 | ||
133 | static bool | |
134 | grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid) | |
135 | { | |
136 | if (!seq) | |
137 | return true; | |
138 | ||
139 | gimple_stmt_iterator gsi; | |
140 | for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) | |
141 | if (!grid_safe_assignment_p (gsi_stmt (gsi), grid)) | |
142 | return false; | |
143 | return true; | |
144 | } | |
145 | ||
146 | /* Scan statements in SEQ and call itself recursively on any bind. GRID | |
147 | describes hitherto discovered properties of the loop that is evaluated for | |
148 | possible gridification. If during whole search only assignments to | |
149 | register-type local variables (that do not overwrite group size information) | |
150 | and one single OMP statement is encountered, return true, otherwise return | |
151 | false. RET is where we store any OMP statement encountered. */ | |
152 | ||
153 | static bool | |
154 | grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid, | |
155 | const char *name, gimple **ret) | |
156 | { | |
157 | gimple_stmt_iterator gsi; | |
158 | for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) | |
159 | { | |
160 | gimple *stmt = gsi_stmt (gsi); | |
161 | ||
162 | if (grid_safe_assignment_p (stmt, grid)) | |
163 | continue; | |
164 | if (gbind *bind = dyn_cast <gbind *> (stmt)) | |
165 | { | |
01914336 MJ |
166 | gimple_seq bind_body = gimple_bind_body (bind); |
167 | if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name, | |
168 | ret)) | |
629b3d75 MJ |
169 | return false; |
170 | } | |
171 | else if (is_gimple_omp (stmt)) | |
172 | { | |
173 | if (*ret) | |
174 | { | |
175 | if (dump_enabled_p ()) | |
176 | { | |
177 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
178 | GRID_MISSED_MSG_PREFIX "%s construct " | |
179 | "contains multiple OpenMP constructs\n", | |
180 | name); | |
4f5b9c80 | 181 | dump_printf_loc (MSG_NOTE, *ret, |
629b3d75 MJ |
182 | "The first OpenMP construct within " |
183 | "a parallel\n"); | |
4f5b9c80 | 184 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
185 | "The second OpenMP construct within " |
186 | "a parallel\n"); | |
187 | } | |
188 | return false; | |
189 | } | |
190 | *ret = stmt; | |
191 | } | |
192 | else | |
193 | { | |
194 | if (dump_enabled_p ()) | |
195 | { | |
196 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
197 | GRID_MISSED_MSG_PREFIX "%s construct contains " | |
198 | "a complex statement\n", name); | |
4f5b9c80 | 199 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
200 | "This statement cannot be analyzed for " |
201 | "gridification\n"); | |
202 | } | |
203 | return false; | |
204 | } | |
205 | } | |
206 | return true; | |
207 | } | |
208 | ||
209 | /* Scan statements in SEQ and make sure that it and any binds in it contain | |
210 | only assignments to local register-type variables (that do not overwrite | |
211 | group size information) and one OMP construct. If so, return that | |
212 | construct, otherwise return NULL. GRID describes hitherto discovered | |
213 | properties of the loop that is evaluated for possible gridification. If | |
214 | dumping is enabled and function fails, use NAME to dump a note with the | |
215 | reason for failure. */ | |
216 | ||
217 | static gimple * | |
218 | grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid, | |
219 | const char *name) | |
220 | { | |
221 | if (!seq) | |
222 | { | |
223 | if (dump_enabled_p ()) | |
224 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
225 | GRID_MISSED_MSG_PREFIX "%s construct has empty body\n", | |
226 | name); | |
227 | return NULL; | |
228 | } | |
229 | ||
230 | gimple *ret = NULL; | |
231 | if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret)) | |
232 | { | |
233 | if (!ret && dump_enabled_p ()) | |
234 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
235 | GRID_MISSED_MSG_PREFIX "%s construct does not contain" | |
aa326bfb | 236 | " any other OpenMP construct\n", name); |
629b3d75 MJ |
237 | return ret; |
238 | } | |
239 | else | |
240 | return NULL; | |
241 | } | |
242 | ||
243 | /* Walker function looking for statements there is no point gridifying (and for | |
244 | noreturn function calls which we cannot do). Return non-NULL if such a | |
245 | function is found. */ | |
246 | ||
247 | static tree | |
248 | grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, | |
249 | bool *handled_ops_p, | |
250 | struct walk_stmt_info *wi) | |
251 | { | |
252 | *handled_ops_p = false; | |
253 | gimple *stmt = gsi_stmt (*gsi); | |
254 | switch (gimple_code (stmt)) | |
255 | { | |
256 | case GIMPLE_CALL: | |
257 | if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) | |
258 | { | |
259 | *handled_ops_p = true; | |
260 | wi->info = stmt; | |
261 | return error_mark_node; | |
262 | } | |
263 | break; | |
264 | ||
265 | /* We may reduce the following list if we find a way to implement the | |
266 | clauses, but now there is no point trying further. */ | |
267 | case GIMPLE_OMP_CRITICAL: | |
268 | case GIMPLE_OMP_TASKGROUP: | |
269 | case GIMPLE_OMP_TASK: | |
270 | case GIMPLE_OMP_SECTION: | |
271 | case GIMPLE_OMP_SECTIONS: | |
272 | case GIMPLE_OMP_SECTIONS_SWITCH: | |
273 | case GIMPLE_OMP_TARGET: | |
274 | case GIMPLE_OMP_ORDERED: | |
275 | *handled_ops_p = true; | |
276 | wi->info = stmt; | |
277 | return error_mark_node; | |
278 | default: | |
279 | break; | |
280 | } | |
281 | return NULL; | |
282 | } | |
283 | ||
284 | /* Examine clauses of omp parallel statement PAR and if any prevents | |
285 | gridification, issue a missed-optimization diagnostics and return false, | |
286 | otherwise return true. GRID describes hitherto discovered properties of the | |
287 | loop that is evaluated for possible gridification. */ | |
288 | ||
289 | static bool | |
4f5b9c80 | 290 | grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc) |
629b3d75 MJ |
291 | { |
292 | tree clauses = gimple_omp_parallel_clauses (par); | |
293 | while (clauses) | |
294 | { | |
295 | switch (OMP_CLAUSE_CODE (clauses)) | |
296 | { | |
297 | case OMP_CLAUSE_NUM_THREADS: | |
298 | if (dump_enabled_p ()) | |
299 | { | |
300 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
301 | GRID_MISSED_MSG_PREFIX "because there is " | |
302 | "a num_threads clause of the parallel " | |
303 | "construct\n"); | |
4f5b9c80 | 304 | dump_printf_loc (MSG_NOTE, par, |
629b3d75 MJ |
305 | "Parallel construct has a num_threads clause\n"); |
306 | } | |
307 | return false; | |
308 | ||
309 | case OMP_CLAUSE_REDUCTION: | |
310 | if (dump_enabled_p ()) | |
311 | { | |
312 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
aa326bfb | 313 | GRID_MISSED_MSG_PREFIX "a reduction clause " |
629b3d75 | 314 | "is present\n "); |
4f5b9c80 | 315 | dump_printf_loc (MSG_NOTE, par, |
629b3d75 MJ |
316 | "Parallel construct has a reduction clause\n"); |
317 | } | |
318 | return false; | |
319 | ||
320 | default: | |
321 | break; | |
322 | } | |
323 | clauses = OMP_CLAUSE_CHAIN (clauses); | |
324 | } | |
325 | return true; | |
326 | } | |
327 | ||
328 | /* Examine clauses and the body of omp loop statement GFOR and if something | |
329 | prevents gridification, issue a missed-optimization diagnostics and return | |
01914336 | 330 | false, otherwise return true. GRID describes hitherto discovered properties |
629b3d75 MJ |
331 | of the loop that is evaluated for possible gridification. */ |
332 | ||
333 | static bool | |
334 | grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) | |
335 | { | |
336 | if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor), | |
337 | grid)) | |
338 | { | |
339 | if (dump_enabled_p ()) | |
340 | { | |
341 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
342 | GRID_MISSED_MSG_PREFIX "the inner loop " | |
343 | "loop bounds computation contains a complex " | |
344 | "statement\n"); | |
4f5b9c80 | 345 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
346 | "Loop construct cannot be analyzed for " |
347 | "gridification\n"); | |
348 | } | |
349 | return false; | |
350 | } | |
351 | ||
352 | tree clauses = gimple_omp_for_clauses (gfor); | |
353 | while (clauses) | |
354 | { | |
355 | switch (OMP_CLAUSE_CODE (clauses)) | |
356 | { | |
357 | case OMP_CLAUSE_SCHEDULE: | |
358 | if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) | |
359 | { | |
360 | if (dump_enabled_p ()) | |
361 | { | |
362 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
363 | GRID_MISSED_MSG_PREFIX "the inner loop " | |
364 | "has a non-automatic schedule clause\n"); | |
4f5b9c80 | 365 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
366 | "Loop construct has a non automatic " |
367 | "schedule clause\n"); | |
368 | } | |
369 | return false; | |
370 | } | |
371 | break; | |
372 | ||
373 | case OMP_CLAUSE_REDUCTION: | |
374 | if (dump_enabled_p ()) | |
375 | { | |
376 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
377 | GRID_MISSED_MSG_PREFIX "a reduction " | |
378 | "clause is present\n "); | |
4f5b9c80 | 379 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
380 | "Loop construct has a reduction schedule " |
381 | "clause\n"); | |
382 | } | |
383 | return false; | |
384 | ||
385 | default: | |
386 | break; | |
387 | } | |
388 | clauses = OMP_CLAUSE_CHAIN (clauses); | |
389 | } | |
390 | struct walk_stmt_info wi; | |
391 | memset (&wi, 0, sizeof (wi)); | |
392 | if (walk_gimple_seq (gimple_omp_body (gfor), | |
393 | grid_find_ungridifiable_statement, | |
394 | NULL, &wi)) | |
395 | { | |
396 | gimple *bad = (gimple *) wi.info; | |
397 | if (dump_enabled_p ()) | |
398 | { | |
399 | if (is_gimple_call (bad)) | |
400 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
401 | GRID_MISSED_MSG_PREFIX "the inner loop contains " | |
402 | "call to a noreturn function\n"); | |
403 | else | |
404 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
405 | GRID_MISSED_MSG_PREFIX "the inner loop contains " | |
406 | "statement %s which cannot be transformed\n", | |
407 | gimple_code_name[(int) gimple_code (bad)]); | |
4f5b9c80 | 408 | dump_printf_loc (MSG_NOTE, bad, |
629b3d75 MJ |
409 | "This statement cannot be analyzed for " |
410 | "gridification\n"); | |
411 | } | |
412 | return false; | |
413 | } | |
414 | return true; | |
415 | } | |
416 | ||
417 | /* Given distribute omp construct represented by DIST, which in the original | |
418 | source forms a compound construct with a looping construct, return true if it | |
01914336 | 419 | can be turned into a gridified HSA kernel. Otherwise return false. GRID |
629b3d75 MJ |
420 | describes hitherto discovered properties of the loop that is evaluated for |
421 | possible gridification. */ | |
422 | ||
423 | static bool | |
424 | grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid) | |
425 | { | |
4f5b9c80 | 426 | dump_user_location_t tloc = grid->target_loc; |
629b3d75 MJ |
427 | gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), |
428 | grid, "distribute"); | |
429 | gomp_parallel *par; | |
430 | if (!stmt | |
431 | || !(par = dyn_cast <gomp_parallel *> (stmt)) | |
432 | || !grid_parallel_clauses_gridifiable (par, tloc)) | |
433 | return false; | |
434 | ||
435 | stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid, | |
436 | "parallel"); | |
437 | gomp_for *gfor; | |
438 | if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) | |
439 | return false; | |
440 | ||
441 | if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) | |
442 | { | |
443 | if (dump_enabled_p ()) | |
444 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
445 | GRID_MISSED_MSG_PREFIX "the inner loop is not " | |
446 | "a simple for loop\n"); | |
447 | return false; | |
448 | } | |
449 | gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse); | |
450 | ||
451 | if (!grid_inner_loop_gridifiable_p (gfor, grid)) | |
452 | return false; | |
453 | ||
454 | return true; | |
455 | } | |
456 | ||
457 | /* Given an omp loop statement GFOR, return true if it can participate in | |
458 | tiling gridification, i.e. in one where the distribute and parallel for | |
459 | loops do not form a compound statement. GRID describes hitherto discovered | |
01914336 | 460 | properties of the loop that is evaluated for possible gridification. */ |
629b3d75 MJ |
461 | |
462 | static bool | |
463 | grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid) | |
464 | { | |
465 | if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) | |
466 | { | |
467 | if (dump_enabled_p ()) | |
468 | { | |
469 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
470 | GRID_MISSED_MSG_PREFIX "an inner loop is not " | |
471 | "a simple for loop\n"); | |
4f5b9c80 | 472 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
473 | "This statement is not a simple for loop\n"); |
474 | } | |
475 | return false; | |
476 | } | |
477 | ||
478 | if (!grid_inner_loop_gridifiable_p (gfor, grid)) | |
479 | return false; | |
480 | ||
481 | if (gimple_omp_for_collapse (gfor) != grid->collapse) | |
482 | { | |
483 | if (dump_enabled_p ()) | |
484 | { | |
485 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
486 | GRID_MISSED_MSG_PREFIX "an inner loop does not " | |
487 | "have use the same collapse clause\n"); | |
4f5b9c80 | 488 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
489 | "Loop construct uses a different collapse clause\n"); |
490 | } | |
491 | return false; | |
492 | } | |
493 | ||
494 | struct omp_for_data fd; | |
495 | struct omp_for_data_loop *loops | |
496 | = (struct omp_for_data_loop *)alloca (grid->collapse | |
497 | * sizeof (struct omp_for_data_loop)); | |
498 | omp_extract_for_data (gfor, &fd, loops); | |
499 | for (unsigned i = 0; i < grid->collapse; i++) | |
500 | { | |
501 | tree itype, type = TREE_TYPE (fd.loops[i].v); | |
502 | if (POINTER_TYPE_P (type)) | |
503 | itype = signed_type_for (type); | |
504 | else | |
505 | itype = type; | |
506 | ||
507 | tree n1 = fold_convert (itype, fd.loops[i].n1); | |
508 | tree n2 = fold_convert (itype, fd.loops[i].n2); | |
509 | tree t = build_int_cst (itype, | |
510 | (fd.loops[i].cond_code == LT_EXPR ? -1 : 1)); | |
511 | t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t); | |
512 | t = fold_build2 (PLUS_EXPR, itype, t, n2); | |
513 | t = fold_build2 (MINUS_EXPR, itype, t, n1); | |
514 | if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR) | |
515 | t = fold_build2 (TRUNC_DIV_EXPR, itype, | |
516 | fold_build1 (NEGATE_EXPR, itype, t), | |
517 | fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step)); | |
518 | else | |
519 | t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step); | |
520 | ||
521 | if (!operand_equal_p (grid->group_sizes[i], t, 0)) | |
522 | { | |
523 | if (dump_enabled_p ()) | |
524 | { | |
525 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
526 | GRID_MISSED_MSG_PREFIX "the distribute and " | |
527 | "an internal loop do not agree on tile size\n"); | |
4f5b9c80 | 528 | dump_printf_loc (MSG_NOTE, gfor, |
629b3d75 MJ |
529 | "Loop construct does not seem to loop over " |
530 | "a tile size\n"); | |
531 | } | |
532 | return false; | |
533 | } | |
534 | } | |
535 | return true; | |
536 | } | |
537 | ||
538 | /* Facing a call to FNDECL in the body of a distribute construct, return true | |
539 | if we can handle it or false if it precludes gridification. */ | |
540 | ||
541 | static bool | |
542 | grid_call_permissible_in_distribute_p (tree fndecl) | |
543 | { | |
544 | if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) | |
545 | return true; | |
546 | ||
547 | const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); | |
548 | if (strstr (name, "omp_") != name) | |
549 | return false; | |
550 | ||
551 | if ((strcmp (name, "omp_get_thread_num") == 0) | |
552 | || (strcmp (name, "omp_get_num_threads") == 0) | |
553 | || (strcmp (name, "omp_get_num_teams") == 0) | |
554 | || (strcmp (name, "omp_get_team_num") == 0) | |
555 | || (strcmp (name, "omp_get_level") == 0) | |
556 | || (strcmp (name, "omp_get_active_level") == 0) | |
557 | || (strcmp (name, "omp_in_parallel") == 0)) | |
558 | return true; | |
559 | ||
560 | return false; | |
561 | } | |
562 | ||
563 | /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body | |
564 | of a distribute construct that is pointed at by GSI, modify it as necessary | |
565 | for gridification. If the statement itself got removed, return true. */ | |
566 | ||
567 | static bool | |
568 | grid_handle_call_in_distribute (gimple_stmt_iterator *gsi) | |
569 | { | |
570 | gimple *stmt = gsi_stmt (*gsi); | |
571 | tree fndecl = gimple_call_fndecl (stmt); | |
572 | gcc_checking_assert (stmt); | |
573 | if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) | |
574 | return false; | |
575 | ||
576 | const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); | |
577 | if ((strcmp (name, "omp_get_thread_num") == 0) | |
578 | || (strcmp (name, "omp_get_level") == 0) | |
579 | || (strcmp (name, "omp_get_active_level") == 0) | |
580 | || (strcmp (name, "omp_in_parallel") == 0)) | |
581 | { | |
582 | tree lhs = gimple_call_lhs (stmt); | |
583 | if (lhs) | |
584 | { | |
585 | gassign *assign | |
586 | = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); | |
587 | gsi_insert_before (gsi, assign, GSI_SAME_STMT); | |
588 | } | |
589 | gsi_remove (gsi, true); | |
590 | return true; | |
591 | } | |
592 | ||
593 | /* The rest of the omp functions can stay as they are, HSA back-end will | |
594 | handle them correctly. */ | |
595 | gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0) | |
596 | || (strcmp (name, "omp_get_num_teams") == 0) | |
597 | || (strcmp (name, "omp_get_team_num") == 0)); | |
598 | return false; | |
599 | } | |
600 | ||
601 | /* Given a sequence of statements within a distribute omp construct or a | |
602 | parallel construct, which in the original source does not form a compound | |
603 | construct with a looping construct, return true if it does not prevent us | |
01914336 | 604 | from turning it into a gridified HSA kernel. Otherwise return false. GRID |
629b3d75 MJ |
605 | describes hitherto discovered properties of the loop that is evaluated for |
606 | possible gridification. IN_PARALLEL must be true if seq is within a | |
607 | parallel construct and flase if it is only within a distribute | |
608 | construct. */ | |
609 | ||
610 | static bool | |
611 | grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid, | |
612 | bool in_parallel) | |
613 | { | |
614 | gimple_stmt_iterator gsi; | |
615 | for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) | |
616 | { | |
617 | gimple *stmt = gsi_stmt (gsi); | |
618 | ||
619 | if (grid_safe_assignment_p (stmt, grid) | |
620 | || gimple_code (stmt) == GIMPLE_GOTO | |
621 | || gimple_code (stmt) == GIMPLE_LABEL | |
622 | || gimple_code (stmt) == GIMPLE_COND) | |
623 | continue; | |
624 | else if (gbind *bind = dyn_cast <gbind *> (stmt)) | |
625 | { | |
626 | if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind), | |
627 | grid, in_parallel)) | |
628 | return false; | |
629 | continue; | |
630 | } | |
631 | else if (gtry *try_stmt = dyn_cast <gtry *> (stmt)) | |
632 | { | |
633 | if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH) | |
634 | { | |
635 | if (dump_enabled_p ()) | |
636 | { | |
637 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
638 | GRID_MISSED_MSG_PREFIX "the distribute " | |
639 | "construct contains a try..catch region\n"); | |
4f5b9c80 | 640 | dump_printf_loc (MSG_NOTE, try_stmt, |
629b3d75 MJ |
641 | "This statement cannot be analyzed for " |
642 | "tiled gridification\n"); | |
643 | } | |
644 | return false; | |
645 | } | |
646 | if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt), | |
647 | grid, in_parallel)) | |
648 | return false; | |
649 | if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt), | |
650 | grid, in_parallel)) | |
651 | return false; | |
652 | continue; | |
653 | } | |
654 | else if (is_gimple_call (stmt)) | |
655 | { | |
656 | tree fndecl = gimple_call_fndecl (stmt); | |
657 | if (fndecl && grid_call_permissible_in_distribute_p (fndecl)) | |
658 | continue; | |
659 | ||
660 | if (dump_enabled_p ()) | |
661 | { | |
662 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
663 | GRID_MISSED_MSG_PREFIX "the distribute " | |
664 | "construct contains a call\n"); | |
4f5b9c80 | 665 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
666 | "This statement cannot be analyzed for " |
667 | "tiled gridification\n"); | |
668 | } | |
669 | return false; | |
670 | } | |
671 | else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt)) | |
672 | { | |
673 | if (in_parallel) | |
674 | { | |
675 | if (dump_enabled_p ()) | |
676 | { | |
677 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
678 | GRID_MISSED_MSG_PREFIX "a parallel " | |
679 | "construct contains another parallel " | |
680 | "construct\n"); | |
4f5b9c80 | 681 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
682 | "This parallel construct is nested in " |
683 | "another one\n"); | |
684 | } | |
685 | return false; | |
686 | } | |
687 | if (!grid_parallel_clauses_gridifiable (par, grid->target_loc) | |
688 | || !grid_dist_follows_tiling_pattern (gimple_omp_body (par), | |
689 | grid, true)) | |
690 | return false; | |
691 | } | |
692 | else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt)) | |
693 | { | |
694 | if (!in_parallel) | |
695 | { | |
696 | if (dump_enabled_p ()) | |
697 | { | |
698 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
699 | GRID_MISSED_MSG_PREFIX "a loop " | |
700 | "construct is not nested within a parallel " | |
701 | "construct\n"); | |
4f5b9c80 | 702 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
703 | "This loop construct is not nested in " |
704 | "a parallel construct\n"); | |
705 | } | |
706 | return false; | |
707 | } | |
708 | if (!grid_gfor_follows_tiling_pattern (gfor, grid)) | |
709 | return false; | |
710 | } | |
711 | else | |
712 | { | |
713 | if (dump_enabled_p ()) | |
714 | { | |
715 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, | |
716 | GRID_MISSED_MSG_PREFIX "the distribute " | |
717 | "construct contains a complex statement\n"); | |
4f5b9c80 | 718 | dump_printf_loc (MSG_NOTE, stmt, |
629b3d75 MJ |
719 | "This statement cannot be analyzed for " |
720 | "tiled gridification\n"); | |
721 | } | |
722 | return false; | |
723 | } | |
724 | } | |
725 | return true; | |
726 | } | |
727 | ||
728 | /* If TARGET follows a pattern that can be turned into a gridified HSA kernel, | |
729 | return true, otherwise return false. In the case of success, also fill in | |
730 | GRID with information describing the kernel grid. */ | |
731 | ||
732 | static bool | |
733 | grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid) | |
734 | { | |
735 | if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) | |
736 | return false; | |
737 | ||
4f5b9c80 | 738 | dump_user_location_t tloc = target; |
629b3d75 MJ |
739 | grid->target_loc = tloc; |
740 | gimple *stmt | |
741 | = grid_find_single_omp_among_assignments (gimple_omp_body (target), | |
742 | grid, "target"); | |
743 | if (!stmt) | |
744 | return false; | |
745 | gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); | |
746 | tree group_size = NULL; | |
747 | if (!teams) | |
748 | { | |
b2a8d77a MJ |
749 | if (dump_enabled_p ()) |
750 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
751 | GRID_MISSED_MSG_PREFIX "it does not have a sole " | |
752 | "teams construct in it.\n"); | |
629b3d75 MJ |
753 | return false; |
754 | } | |
755 | ||
756 | tree clauses = gimple_omp_teams_clauses (teams); | |
757 | while (clauses) | |
758 | { | |
759 | switch (OMP_CLAUSE_CODE (clauses)) | |
760 | { | |
761 | case OMP_CLAUSE_NUM_TEAMS: | |
762 | if (dump_enabled_p ()) | |
763 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
764 | GRID_MISSED_MSG_PREFIX "the teams construct " | |
765 | "contains a num_teams clause\n "); | |
766 | return false; | |
767 | ||
768 | case OMP_CLAUSE_REDUCTION: | |
769 | if (dump_enabled_p ()) | |
770 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
771 | GRID_MISSED_MSG_PREFIX "a reduction " | |
772 | "clause is present\n "); | |
773 | return false; | |
774 | ||
775 | case OMP_CLAUSE_THREAD_LIMIT: | |
776 | if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0))) | |
777 | group_size = OMP_CLAUSE_OPERAND (clauses, 0); | |
778 | break; | |
779 | ||
780 | default: | |
781 | break; | |
782 | } | |
783 | clauses = OMP_CLAUSE_CHAIN (clauses); | |
784 | } | |
785 | ||
786 | stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid, | |
787 | "teams"); | |
788 | if (!stmt) | |
789 | return false; | |
790 | gomp_for *dist = dyn_cast <gomp_for *> (stmt); | |
791 | if (!dist) | |
792 | { | |
b2a8d77a MJ |
793 | if (dump_enabled_p ()) |
794 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
795 | GRID_MISSED_MSG_PREFIX "the teams construct does not " | |
796 | "have a single distribute construct in it.\n"); | |
629b3d75 MJ |
797 | return false; |
798 | } | |
799 | ||
800 | gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); | |
801 | ||
802 | grid->collapse = gimple_omp_for_collapse (dist); | |
803 | if (grid->collapse > 3) | |
804 | { | |
805 | if (dump_enabled_p ()) | |
806 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
807 | GRID_MISSED_MSG_PREFIX "the distribute construct " | |
808 | "contains collapse clause with parameter greater " | |
809 | "than 3\n"); | |
810 | return false; | |
811 | } | |
812 | ||
813 | struct omp_for_data fd; | |
814 | struct omp_for_data_loop *dist_loops | |
815 | = (struct omp_for_data_loop *)alloca (grid->collapse | |
816 | * sizeof (struct omp_for_data_loop)); | |
817 | omp_extract_for_data (dist, &fd, dist_loops); | |
818 | if (fd.chunk_size) | |
819 | { | |
820 | if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) | |
821 | { | |
822 | if (dump_enabled_p ()) | |
823 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
824 | GRID_MISSED_MSG_PREFIX "the teams " | |
825 | "thread limit is different from distribute " | |
826 | "schedule chunk\n"); | |
827 | return false; | |
828 | } | |
829 | group_size = fd.chunk_size; | |
830 | } | |
831 | if (group_size && grid->collapse > 1) | |
832 | { | |
833 | if (dump_enabled_p ()) | |
834 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
835 | GRID_MISSED_MSG_PREFIX "group size cannot be " | |
836 | "set using thread_limit or schedule clauses " | |
837 | "when also using a collapse clause greater than 1\n"); | |
838 | return false; | |
839 | } | |
840 | ||
841 | if (gimple_omp_for_combined_p (dist)) | |
842 | { | |
843 | grid->tiling = false; | |
844 | grid->group_sizes[0] = group_size; | |
845 | for (unsigned i = 1; i < grid->collapse; i++) | |
846 | grid->group_sizes[i] = NULL; | |
847 | return grid_dist_follows_simple_pattern (dist, grid); | |
848 | } | |
849 | else | |
850 | { | |
851 | grid->tiling = true; | |
852 | if (group_size) | |
853 | { | |
854 | if (dump_enabled_p ()) | |
855 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, | |
856 | GRID_MISSED_MSG_PREFIX "group size cannot be set " | |
857 | "using thread_limit or schedule clauses when " | |
858 | "distribute and loop constructs do not form " | |
859 | "one combined construct\n"); | |
860 | return false; | |
861 | } | |
862 | for (unsigned i = 0; i < grid->collapse; i++) | |
863 | { | |
864 | if (fd.loops[i].cond_code == GT_EXPR) | |
865 | grid->group_sizes[i] = fold_build1 (NEGATE_EXPR, | |
866 | TREE_TYPE (fd.loops[i].step), | |
867 | fd.loops[i].step); | |
868 | else | |
869 | grid->group_sizes[i] = fd.loops[i].step; | |
870 | } | |
871 | return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid, | |
872 | false); | |
873 | } | |
874 | } | |
875 | ||
876 | /* Operand walker, used to remap pre-body declarations according to a hash map | |
877 | provided in DATA. */ | |
878 | ||
879 | static tree | |
880 | grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) | |
881 | { | |
882 | tree t = *tp; | |
883 | ||
884 | if (DECL_P (t) || TYPE_P (t)) | |
885 | *walk_subtrees = 0; | |
886 | else | |
887 | *walk_subtrees = 1; | |
888 | ||
889 | if (VAR_P (t)) | |
890 | { | |
891 | struct walk_stmt_info *wi = (struct walk_stmt_info *) data; | |
892 | hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; | |
893 | tree *repl = declmap->get (t); | |
894 | if (repl) | |
895 | *tp = *repl; | |
896 | } | |
897 | return NULL_TREE; | |
898 | } | |
899 | ||
900 | /* Identifiers of segments into which a particular variable should be places | |
901 | when gridifying. */ | |
902 | ||
903 | enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP, | |
904 | GRID_SEGMENT_GLOBAL}; | |
905 | ||
906 | /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial | |
907 | builtin call into SEQ that will make sure the variable is always considered | |
908 | address taken. */ | |
909 | ||
910 | static void | |
911 | grid_mark_variable_segment (tree var, enum grid_var_segment segment) | |
912 | { | |
913 | /* Making a non-addressable variables would require that we re-gimplify all | |
914 | their uses. Fortunately, we do not have to do this because if they are | |
915 | not addressable, it means they are not used in atomic or parallel | |
916 | statements and so relaxed GPU consistency rules mean we can just keep them | |
01914336 | 917 | private. */ |
629b3d75 MJ |
918 | if (!TREE_ADDRESSABLE (var)) |
919 | return; | |
920 | ||
921 | switch (segment) | |
922 | { | |
923 | case GRID_SEGMENT_GROUP: | |
924 | DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"), | |
925 | NULL, DECL_ATTRIBUTES (var)); | |
926 | break; | |
927 | case GRID_SEGMENT_GLOBAL: | |
928 | DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"), | |
929 | NULL, DECL_ATTRIBUTES (var)); | |
930 | break; | |
931 | default: | |
932 | gcc_unreachable (); | |
933 | } | |
934 | ||
935 | if (!TREE_STATIC (var)) | |
936 | { | |
937 | TREE_STATIC (var) = 1; | |
09e3944e MJ |
938 | const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var)); |
939 | SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix)); | |
629b3d75 MJ |
940 | varpool_node::finalize_decl (var); |
941 | } | |
942 | ||
943 | } | |
944 | ||
945 | /* Copy leading register-type assignments to local variables in SRC to just | |
946 | before DST, Creating temporaries, adjusting mapping of operands in WI and | |
947 | remapping operands as necessary. Add any new temporaries to TGT_BIND. | |
948 | Return the first statement that does not conform to grid_safe_assignment_p | |
949 | or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all | |
950 | variables in traversed bind statements so that they are put into the | |
951 | appropriate segment. */ | |
952 | ||
953 | static gimple * | |
954 | grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, | |
955 | gbind *tgt_bind, | |
956 | enum grid_var_segment var_segment, | |
957 | struct walk_stmt_info *wi) | |
958 | { | |
959 | hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; | |
960 | gimple_stmt_iterator gsi; | |
961 | for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) | |
962 | { | |
963 | gimple *stmt = gsi_stmt (gsi); | |
964 | if (gbind *bind = dyn_cast <gbind *> (stmt)) | |
965 | { | |
966 | gimple *r = grid_copy_leading_local_assignments | |
967 | (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi); | |
968 | ||
969 | if (var_segment != GRID_SEGMENT_PRIVATE) | |
01914336 MJ |
970 | for (tree var = gimple_bind_vars (bind); |
971 | var; | |
972 | var = DECL_CHAIN (var)) | |
629b3d75 MJ |
973 | grid_mark_variable_segment (var, var_segment); |
974 | if (r) | |
975 | return r; | |
976 | else | |
977 | continue; | |
978 | } | |
979 | if (!grid_safe_assignment_p (stmt, NULL)) | |
980 | return stmt; | |
981 | tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt)); | |
982 | tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), | |
983 | TREE_TYPE (lhs)); | |
984 | DECL_CONTEXT (repl) = current_function_decl; | |
985 | gimple_bind_append_vars (tgt_bind, repl); | |
986 | ||
987 | declmap->put (lhs, repl); | |
988 | gassign *copy = as_a <gassign *> (gimple_copy (stmt)); | |
989 | walk_gimple_op (copy, grid_remap_prebody_decls, wi); | |
990 | gsi_insert_before (dst, copy, GSI_SAME_STMT); | |
991 | } | |
992 | return NULL; | |
993 | } | |
994 | ||
995 | /* Statement walker function to make adjustments to statements within the | |
996 | gridifed kernel copy. */ | |
997 | ||
998 | static tree | |
999 | grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, | |
1000 | struct walk_stmt_info *) | |
1001 | { | |
1002 | *handled_ops_p = false; | |
1003 | gimple *stmt = gsi_stmt (*gsi); | |
1004 | if (gimple_code (stmt) == GIMPLE_OMP_FOR | |
dfa6e5b4 | 1005 | && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD) |
629b3d75 MJ |
1006 | { |
1007 | gomp_for *loop = as_a <gomp_for *> (stmt); | |
1008 | tree clauses = gimple_omp_for_clauses (loop); | |
1009 | tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN); | |
1010 | if (cl) | |
1011 | OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; | |
1012 | else | |
1013 | { | |
1014 | tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); | |
1015 | OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; | |
1016 | OMP_CLAUSE_CHAIN (c) = clauses; | |
1017 | gimple_omp_for_set_clauses (loop, c); | |
1018 | } | |
1019 | } | |
1020 | return NULL_TREE; | |
1021 | } | |
1022 | ||
1023 | /* Given a PARLOOP that is a normal for looping construct but also a part of a | |
1024 | combined construct with a simd loop, eliminate the simd loop. */ | |
1025 | ||
1026 | static void | |
1027 | grid_eliminate_combined_simd_part (gomp_for *parloop) | |
1028 | { | |
1029 | struct walk_stmt_info wi; | |
1030 | ||
1031 | memset (&wi, 0, sizeof (wi)); | |
1032 | wi.val_only = true; | |
dfa6e5b4 | 1033 | enum gf_mask msk = GF_OMP_FOR_KIND_SIMD; |
629b3d75 MJ |
1034 | wi.info = (void *) &msk; |
1035 | walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi); | |
1036 | gimple *stmt = (gimple *) wi.info; | |
1037 | /* We expect that the SIMD id the only statement in the parallel loop. */ | |
1038 | gcc_assert (stmt | |
1039 | && gimple_code (stmt) == GIMPLE_OMP_FOR | |
dfa6e5b4 | 1040 | && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD) |
629b3d75 MJ |
1041 | && gimple_omp_for_combined_into_p (stmt) |
1042 | && !gimple_omp_for_combined_p (stmt)); | |
1043 | gomp_for *simd = as_a <gomp_for *> (stmt); | |
1044 | ||
1045 | /* Copy over the iteration properties because the body refers to the index in | |
1046 | the bottmom-most loop. */ | |
1047 | unsigned i, collapse = gimple_omp_for_collapse (parloop); | |
1048 | gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); | |
1049 | for (i = 0; i < collapse; i++) | |
1050 | { | |
1051 | gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); | |
1052 | gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); | |
1053 | gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); | |
1054 | gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); | |
1055 | } | |
1056 | ||
1057 | tree *tgt= gimple_omp_for_clauses_ptr (parloop); | |
1058 | while (*tgt) | |
1059 | tgt = &OMP_CLAUSE_CHAIN (*tgt); | |
1060 | ||
28567c40 JJ |
1061 | /* Copy over all clauses, except for linear clauses, which are turned into |
1062 | private clauses, and all other simd-specific clauses, which are | |
629b3d75 MJ |
1063 | ignored. */ |
1064 | tree *pc = gimple_omp_for_clauses_ptr (simd); | |
1065 | while (*pc) | |
1066 | { | |
1067 | tree c = *pc; | |
4ed1ff7e | 1068 | switch (OMP_CLAUSE_CODE (c)) |
629b3d75 MJ |
1069 | { |
1070 | case OMP_CLAUSE_LINEAR: | |
1071 | { | |
1072 | tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); | |
1073 | OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); | |
1074 | OMP_CLAUSE_CHAIN (priv) = NULL; | |
1075 | *tgt = priv; | |
1076 | tgt = &OMP_CLAUSE_CHAIN (priv); | |
1077 | pc = &OMP_CLAUSE_CHAIN (c); | |
1078 | break; | |
1079 | } | |
1080 | ||
1081 | case OMP_CLAUSE_SAFELEN: | |
1082 | case OMP_CLAUSE_SIMDLEN: | |
1083 | case OMP_CLAUSE_ALIGNED: | |
1084 | pc = &OMP_CLAUSE_CHAIN (c); | |
1085 | break; | |
1086 | ||
1087 | default: | |
1088 | *pc = OMP_CLAUSE_CHAIN (c); | |
1089 | OMP_CLAUSE_CHAIN (c) = NULL; | |
1090 | *tgt = c; | |
28567c40 | 1091 | tgt = &OMP_CLAUSE_CHAIN (c); |
629b3d75 MJ |
1092 | break; |
1093 | } | |
1094 | } | |
1095 | ||
1096 | /* Finally, throw away the simd and mark the parallel loop as not | |
1097 | combined. */ | |
1098 | gimple_omp_set_body (parloop, gimple_omp_body (simd)); | |
1099 | gimple_omp_for_set_combined_p (parloop, false); | |
1100 | } | |
1101 | ||
1102 | /* Statement walker function marking all parallels as grid_phony and loops as | |
1103 | grid ones representing threads of a particular thread group. */ | |
1104 | ||
1105 | static tree | |
1106 | grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, | |
1107 | struct walk_stmt_info *wi_in) | |
1108 | { | |
1109 | *handled_ops_p = false; | |
1110 | if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi))) | |
1111 | { | |
1112 | *handled_ops_p = true; | |
1113 | gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); | |
1114 | gimple_omp_for_set_grid_intra_group (loop, true); | |
1115 | if (gimple_omp_for_combined_p (loop)) | |
1116 | grid_eliminate_combined_simd_part (loop); | |
1117 | ||
1118 | struct walk_stmt_info body_wi; | |
1119 | memset (&body_wi, 0, sizeof (body_wi)); | |
1120 | walk_gimple_seq_mod (gimple_omp_body_ptr (loop), | |
1121 | grid_process_grid_body, NULL, &body_wi); | |
1122 | ||
1123 | gbind *bind = (gbind *) wi_in->info; | |
1124 | tree c; | |
1125 | for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) | |
1126 | if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) | |
1127 | { | |
1128 | push_gimplify_context (); | |
1129 | tree ov = OMP_CLAUSE_DECL (c); | |
1130 | tree gv = copy_var_decl (ov, create_tmp_var_name (NULL), | |
1131 | TREE_TYPE (ov)); | |
1132 | ||
1133 | grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP); | |
1134 | DECL_CONTEXT (gv) = current_function_decl; | |
1135 | gimple_bind_append_vars (bind, gv); | |
1136 | tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov); | |
1137 | gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c)); | |
1138 | x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv); | |
1139 | gimple_seq l = NULL; | |
1140 | gimplify_and_add (x, &l); | |
1141 | gsi_insert_seq_after (gsi, l, GSI_SAME_STMT); | |
1142 | pop_gimplify_context (bind); | |
1143 | } | |
1144 | } | |
1145 | return NULL_TREE; | |
1146 | } | |
1147 | ||
1148 | /* Statement walker function marking all parallels as grid_phony and loops as | |
1149 | grid ones representing threads of a particular thread group. */ | |
1150 | ||
1151 | static tree | |
1152 | grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi, | |
1153 | bool *handled_ops_p, | |
1154 | struct walk_stmt_info *wi_in) | |
1155 | { | |
1156 | *handled_ops_p = false; | |
1157 | wi_in->removed_stmt = false; | |
1158 | gimple *stmt = gsi_stmt (*gsi); | |
1159 | if (gbind *bind = dyn_cast <gbind *> (stmt)) | |
1160 | { | |
1161 | for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) | |
1162 | grid_mark_variable_segment (var, GRID_SEGMENT_GROUP); | |
1163 | } | |
1164 | else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt)) | |
1165 | { | |
1166 | *handled_ops_p = true; | |
1167 | gimple_omp_parallel_set_grid_phony (parallel, true); | |
1168 | ||
1169 | gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); | |
1170 | gimple_bind_set_body (new_bind, gimple_omp_body (parallel)); | |
1171 | gimple_seq s = NULL; | |
1172 | gimple_seq_add_stmt (&s, new_bind); | |
1173 | gimple_omp_set_body (parallel, s); | |
1174 | ||
1175 | struct walk_stmt_info wi_par; | |
1176 | memset (&wi_par, 0, sizeof (wi_par)); | |
1177 | wi_par.info = new_bind; | |
1178 | walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind), | |
1179 | grid_mark_tiling_loops, NULL, &wi_par); | |
1180 | } | |
1181 | else if (is_a <gcall *> (stmt)) | |
1182 | wi_in->removed_stmt = grid_handle_call_in_distribute (gsi); | |
1183 | return NULL_TREE; | |
1184 | } | |
1185 | ||
1186 | /* Given freshly copied top level kernel SEQ, identify the individual OMP | |
1187 | components, mark them as part of kernel, copy assignment leading to them | |
1188 | just before DST, remapping them using WI and adding new temporaries to | |
700d4cb0 | 1189 | TGT_BIND, and return the loop that will be used for kernel dispatch. */ |
629b3d75 MJ |
1190 | |
1191 | static gomp_for * | |
1192 | grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, | |
1193 | gimple_stmt_iterator *dst, | |
1194 | gbind *tgt_bind, struct walk_stmt_info *wi) | |
1195 | { | |
1196 | gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, | |
1197 | GRID_SEGMENT_GLOBAL, wi); | |
1198 | gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); | |
1199 | gcc_assert (teams); | |
1200 | gimple_omp_teams_set_grid_phony (teams, true); | |
1201 | stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, | |
01914336 MJ |
1202 | tgt_bind, GRID_SEGMENT_GLOBAL, |
1203 | wi); | |
629b3d75 MJ |
1204 | gcc_checking_assert (stmt); |
1205 | gomp_for *dist = dyn_cast <gomp_for *> (stmt); | |
1206 | gcc_assert (dist); | |
1207 | gimple_seq prebody = gimple_omp_for_pre_body (dist); | |
1208 | if (prebody) | |
1209 | grid_copy_leading_local_assignments (prebody, dst, tgt_bind, | |
1210 | GRID_SEGMENT_GROUP, wi); | |
1211 | ||
1212 | if (grid->tiling) | |
1213 | { | |
1214 | gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP); | |
1215 | gimple_omp_for_set_grid_group_iter (dist, true); | |
1216 | ||
1217 | struct walk_stmt_info wi_tiled; | |
1218 | memset (&wi_tiled, 0, sizeof (wi_tiled)); | |
1219 | walk_gimple_seq_mod (gimple_omp_body_ptr (dist), | |
1220 | grid_mark_tiling_parallels_and_loops, NULL, | |
1221 | &wi_tiled); | |
1222 | return dist; | |
1223 | } | |
1224 | else | |
1225 | { | |
1226 | gimple_omp_for_set_grid_phony (dist, true); | |
1227 | stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, | |
1228 | tgt_bind, | |
1229 | GRID_SEGMENT_PRIVATE, wi); | |
1230 | gcc_checking_assert (stmt); | |
1231 | gomp_parallel *parallel = as_a <gomp_parallel *> (stmt); | |
1232 | gimple_omp_parallel_set_grid_phony (parallel, true); | |
1233 | stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), | |
1234 | dst, tgt_bind, | |
1235 | GRID_SEGMENT_PRIVATE, wi); | |
1236 | gomp_for *inner_loop = as_a <gomp_for *> (stmt); | |
1237 | gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); | |
1238 | prebody = gimple_omp_for_pre_body (inner_loop); | |
1239 | if (prebody) | |
1240 | grid_copy_leading_local_assignments (prebody, dst, tgt_bind, | |
1241 | GRID_SEGMENT_PRIVATE, wi); | |
1242 | ||
1243 | if (gimple_omp_for_combined_p (inner_loop)) | |
1244 | grid_eliminate_combined_simd_part (inner_loop); | |
5de73c05 | 1245 | struct walk_stmt_info body_wi; |
629b3d75 MJ |
1246 | memset (&body_wi, 0, sizeof (body_wi)); |
1247 | walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), | |
1248 | grid_process_grid_body, NULL, &body_wi); | |
1249 | ||
1250 | return inner_loop; | |
1251 | } | |
1252 | } | |
1253 | ||
1254 | /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, | |
1255 | create a GPU kernel for it. GSI must point to the same statement, TGT_BIND | |
1256 | is the bind into which temporaries inserted before TARGET should be | |
1257 | added. */ | |
1258 | ||
1259 | static void | |
1260 | grid_attempt_target_gridification (gomp_target *target, | |
1261 | gimple_stmt_iterator *gsi, | |
1262 | gbind *tgt_bind) | |
1263 | { | |
1264 | /* removed group_size */ | |
4f5b9c80 | 1265 | grid_prop grid = {}; |
629b3d75 MJ |
1266 | if (!target || !grid_target_follows_gridifiable_pattern (target, &grid)) |
1267 | return; | |
1268 | ||
1269 | location_t loc = gimple_location (target); | |
1270 | if (dump_enabled_p ()) | |
4f5b9c80 | 1271 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target, |
629b3d75 MJ |
1272 | "Target construct will be turned into a gridified HSA " |
1273 | "kernel\n"); | |
1274 | ||
1275 | /* Copy target body to a GPUKERNEL construct: */ | |
1276 | gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals | |
1277 | (gimple_omp_body (target)); | |
1278 | ||
1279 | hash_map<tree, tree> *declmap = new hash_map<tree, tree>; | |
1280 | struct walk_stmt_info wi; | |
1281 | memset (&wi, 0, sizeof (struct walk_stmt_info)); | |
1282 | wi.info = declmap; | |
1283 | ||
1284 | /* Copy assignments in between OMP statements before target, mark OMP | |
1285 | statements within copy appropriately. */ | |
1286 | gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi, | |
1287 | tgt_bind, &wi); | |
1288 | ||
01914336 MJ |
1289 | gbind *old_bind |
1290 | = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target))); | |
629b3d75 MJ |
1291 | gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq)); |
1292 | tree new_block = gimple_bind_block (new_bind); | |
1293 | tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); | |
1294 | BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); | |
1295 | BLOCK_SUBBLOCKS (enc_block) = new_block; | |
1296 | BLOCK_SUPERCONTEXT (new_block) = enc_block; | |
1297 | gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); | |
1298 | gimple_seq_add_stmt | |
1299 | (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))), | |
1300 | gpukernel); | |
1301 | ||
1302 | for (size_t i = 0; i < grid.collapse; i++) | |
1303 | walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL); | |
1304 | push_gimplify_context (); | |
1305 | for (size_t i = 0; i < grid.collapse; i++) | |
1306 | { | |
031c5c8b MJ |
1307 | tree index_var = gimple_omp_for_index (inner_loop, i); |
1308 | tree itype, type = TREE_TYPE (index_var); | |
629b3d75 MJ |
1309 | if (POINTER_TYPE_P (type)) |
1310 | itype = signed_type_for (type); | |
1311 | else | |
1312 | itype = type; | |
1313 | ||
1314 | enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); | |
1315 | tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); | |
1316 | walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); | |
1317 | tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); | |
1318 | walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); | |
031c5c8b MJ |
1319 | tree step |
1320 | = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); | |
1321 | omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step); | |
629b3d75 MJ |
1322 | n1 = fold_convert (itype, n1); |
1323 | n2 = fold_convert (itype, n2); | |
1324 | ||
791929c9 | 1325 | tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2); |
629b3d75 MJ |
1326 | |
1327 | tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); | |
1328 | t = fold_build2 (PLUS_EXPR, itype, step, t); | |
1329 | t = fold_build2 (PLUS_EXPR, itype, t, n2); | |
1330 | t = fold_build2 (MINUS_EXPR, itype, t, n1); | |
1331 | if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) | |
1332 | t = fold_build2 (TRUNC_DIV_EXPR, itype, | |
1333 | fold_build1 (NEGATE_EXPR, itype, t), | |
1334 | fold_build1 (NEGATE_EXPR, itype, step)); | |
1335 | else | |
1336 | t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); | |
791929c9 | 1337 | t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype)); |
629b3d75 | 1338 | if (grid.tiling) |
01914336 MJ |
1339 | { |
1340 | if (cond_code == GT_EXPR) | |
1341 | step = fold_build1 (NEGATE_EXPR, itype, step); | |
1342 | t = fold_build2 (MULT_EXPR, itype, t, step); | |
1343 | } | |
629b3d75 MJ |
1344 | |
1345 | tree gs = fold_convert (uint32_type_node, t); | |
1346 | gimple_seq tmpseq = NULL; | |
1347 | gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); | |
1348 | if (!gimple_seq_empty_p (tmpseq)) | |
1349 | gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); | |
1350 | ||
1351 | tree ws; | |
1352 | if (grid.group_sizes[i]) | |
1353 | { | |
1354 | ws = fold_convert (uint32_type_node, grid.group_sizes[i]); | |
1355 | tmpseq = NULL; | |
1356 | gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); | |
1357 | if (!gimple_seq_empty_p (tmpseq)) | |
1358 | gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); | |
1359 | } | |
1360 | else | |
1361 | ws = build_zero_cst (uint32_type_node); | |
1362 | ||
1363 | tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); | |
1364 | OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; | |
1365 | OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; | |
1366 | OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; | |
1367 | OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); | |
1368 | gimple_omp_target_set_clauses (target, c); | |
1369 | } | |
1370 | pop_gimplify_context (tgt_bind); | |
1371 | delete declmap; | |
1372 | return; | |
1373 | } | |
1374 | ||
01914336 | 1375 | /* Walker function doing all the work for create_target_kernels. */ |
629b3d75 MJ |
1376 | |
1377 | static tree | |
1378 | grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, | |
1379 | bool *handled_ops_p, | |
1380 | struct walk_stmt_info *incoming) | |
1381 | { | |
1382 | *handled_ops_p = false; | |
1383 | ||
1384 | gimple *stmt = gsi_stmt (*gsi); | |
1385 | gomp_target *target = dyn_cast <gomp_target *> (stmt); | |
1386 | if (target) | |
1387 | { | |
1388 | gbind *tgt_bind = (gbind *) incoming->info; | |
1389 | gcc_checking_assert (tgt_bind); | |
1390 | grid_attempt_target_gridification (target, gsi, tgt_bind); | |
1391 | return NULL_TREE; | |
1392 | } | |
1393 | gbind *bind = dyn_cast <gbind *> (stmt); | |
1394 | if (bind) | |
1395 | { | |
1396 | *handled_ops_p = true; | |
1397 | struct walk_stmt_info wi; | |
1398 | memset (&wi, 0, sizeof (wi)); | |
1399 | wi.info = bind; | |
1400 | walk_gimple_seq_mod (gimple_bind_body_ptr (bind), | |
1401 | grid_gridify_all_targets_stmt, NULL, &wi); | |
1402 | } | |
1403 | return NULL_TREE; | |
1404 | } | |
1405 | ||
1406 | /* Attempt to gridify all target constructs in BODY_P. All such targets will | |
1407 | have their bodies duplicated, with the new copy being put into a | |
1408 | gimple_omp_grid_body statement. All kernel-related construct within the | |
1409 | grid_body will be marked with phony flags or kernel kinds. Moreover, some | |
1410 | re-structuring is often needed, such as copying pre-bodies before the target | |
1411 | construct so that kernel grid sizes can be computed. */ | |
1412 | ||
1413 | void | |
1414 | omp_grid_gridify_all_targets (gimple_seq *body_p) | |
1415 | { | |
1416 | struct walk_stmt_info wi; | |
1417 | memset (&wi, 0, sizeof (wi)); | |
1418 | walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); | |
1419 | } |