]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-grid.c
2019-06-19 Steven G. Kargl <kargl@gcc.gnu.org>
[thirdparty/gcc.git] / gcc / omp-grid.c
1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "cgraph.h"
30 #include "pretty-print.h"
31 #include "fold-const.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimple-walk.h"
35 #include "tree-inline.h"
36 #include "langhooks.h"
37 #include "omp-general.h"
38 #include "omp-low.h"
39 #include "omp-grid.h"
40 #include "gimple-pretty-print.h"
41
42 /* Return the lastprivate predicate for a given gridified loop described by
43 FD). */
44
45 tree
46 omp_grid_lastprivate_predicate (struct omp_for_data *fd)
47 {
48 /* When dealing with a gridified loop, we need to check up to three collapsed
49 iteration variables but they are not actually captured in this fd.
50 Fortunately, we can easily rely on HSA builtins to get this
51 information. */
52
53 tree id, size;
54 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
55 && gimple_omp_for_grid_intra_group (fd->for_stmt))
56 {
57 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
58 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
59 }
60 else
61 {
62 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
63 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
64 }
65 tree cond = NULL;
66 for (int dim = 0; dim < fd->collapse; dim++)
67 {
68 tree dim_tree = build_int_cstu (unsigned_type_node, dim);
69 tree u1 = build_int_cstu (unsigned_type_node, 1);
70 tree c2
71 = build2 (EQ_EXPR, boolean_type_node,
72 build2 (PLUS_EXPR, unsigned_type_node,
73 build_call_expr (id, 1, dim_tree), u1),
74 build_call_expr (size, 1, dim_tree));
75 if (cond)
76 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
77 else
78 cond = c2;
79 }
80 return cond;
81 }
82
83 /* Structure describing the basic properties of the loop we ara analyzing
84 whether it can be gridified and when it is gridified. */
85
86 struct grid_prop
87 {
88 /* True when we are doing tiling gridification, i.e. when there is a distinct
89 distribute loop over groups and a loop construct over work-items. False
90 when distribute and parallel for loops form a combined construct. */
91 bool tiling;
92 /* Location of the target construct for optimization information
93 messages. */
94 dump_user_location_t target_loc;
95 /* The collapse clause of the involved loops. Collapse value of all of them
96 must be the same for gridification to take place. */
97 size_t collapse;
98 /* Group sizes, if requested by the user or NULL if not requested. */
99 tree group_sizes[3];
100 };
101
102 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
103 "gridified HSA kernel because "
104
105 /* Return true if STMT is an assignment of a register-type into a local
106 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
107 any of the trees specifying group sizes there. */
108
109 static bool
110 grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
111 {
112 gassign *assign = dyn_cast <gassign *> (stmt);
113 if (!assign)
114 return false;
115 if (gimple_clobber_p (assign))
116 return true;
117 tree lhs = gimple_assign_lhs (assign);
118 if (!VAR_P (lhs)
119 || !is_gimple_reg_type (TREE_TYPE (lhs))
120 || is_global_var (lhs))
121 return false;
122 if (grid)
123 for (unsigned i = 0; i < grid->collapse; i++)
124 if (lhs == grid->group_sizes[i])
125 return false;
126 return true;
127 }
128
129 /* Return true if all statements in SEQ are assignments to local register-type
130 variables that do not hold group size information. */
131
132 static bool
133 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
134 {
135 if (!seq)
136 return true;
137
138 gimple_stmt_iterator gsi;
139 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
140 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
141 return false;
142 return true;
143 }
144
145 /* Scan statements in SEQ and call itself recursively on any bind. GRID
146 describes hitherto discovered properties of the loop that is evaluated for
147 possible gridification. If during whole search only assignments to
148 register-type local variables (that do not overwrite group size information)
149 and one single OMP statement is encountered, return true, otherwise return
150 false. RET is where we store any OMP statement encountered. */
151
152 static bool
153 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
154 const char *name, gimple **ret)
155 {
156 gimple_stmt_iterator gsi;
157 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
158 {
159 gimple *stmt = gsi_stmt (gsi);
160
161 if (grid_safe_assignment_p (stmt, grid))
162 continue;
163 if (gbind *bind = dyn_cast <gbind *> (stmt))
164 {
165 gimple_seq bind_body = gimple_bind_body (bind);
166 if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
167 ret))
168 return false;
169 }
170 else if (is_gimple_omp (stmt))
171 {
172 if (*ret)
173 {
174 if (dump_enabled_p ())
175 {
176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
177 GRID_MISSED_MSG_PREFIX "%s construct "
178 "contains multiple OpenMP constructs\n",
179 name);
180 dump_printf_loc (MSG_NOTE, *ret,
181 "The first OpenMP construct within "
182 "a parallel\n");
183 dump_printf_loc (MSG_NOTE, stmt,
184 "The second OpenMP construct within "
185 "a parallel\n");
186 }
187 return false;
188 }
189 *ret = stmt;
190 }
191 else
192 {
193 if (dump_enabled_p ())
194 {
195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
196 GRID_MISSED_MSG_PREFIX "%s construct contains "
197 "a complex statement\n", name);
198 dump_printf_loc (MSG_NOTE, stmt,
199 "This statement cannot be analyzed for "
200 "gridification\n");
201 }
202 return false;
203 }
204 }
205 return true;
206 }
207
208 /* Scan statements in SEQ and make sure that it and any binds in it contain
209 only assignments to local register-type variables (that do not overwrite
210 group size information) and one OMP construct. If so, return that
211 construct, otherwise return NULL. GRID describes hitherto discovered
212 properties of the loop that is evaluated for possible gridification. If
213 dumping is enabled and function fails, use NAME to dump a note with the
214 reason for failure. */
215
216 static gimple *
217 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
218 const char *name)
219 {
220 if (!seq)
221 {
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
224 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
225 name);
226 return NULL;
227 }
228
229 gimple *ret = NULL;
230 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
231 {
232 if (!ret && dump_enabled_p ())
233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
234 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
235 " any other OpenMP construct\n", name);
236 return ret;
237 }
238 else
239 return NULL;
240 }
241
242 /* Walker function looking for statements there is no point gridifying (and for
243 noreturn function calls which we cannot do). Return non-NULL if such a
244 function is found. */
245
246 static tree
247 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
248 bool *handled_ops_p,
249 struct walk_stmt_info *wi)
250 {
251 *handled_ops_p = false;
252 gimple *stmt = gsi_stmt (*gsi);
253 switch (gimple_code (stmt))
254 {
255 case GIMPLE_CALL:
256 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
257 {
258 *handled_ops_p = true;
259 wi->info = stmt;
260 return error_mark_node;
261 }
262 break;
263
264 /* We may reduce the following list if we find a way to implement the
265 clauses, but now there is no point trying further. */
266 case GIMPLE_OMP_CRITICAL:
267 case GIMPLE_OMP_TASKGROUP:
268 case GIMPLE_OMP_TASK:
269 case GIMPLE_OMP_SECTION:
270 case GIMPLE_OMP_SECTIONS:
271 case GIMPLE_OMP_SECTIONS_SWITCH:
272 case GIMPLE_OMP_TARGET:
273 case GIMPLE_OMP_ORDERED:
274 *handled_ops_p = true;
275 wi->info = stmt;
276 return error_mark_node;
277 default:
278 break;
279 }
280 return NULL;
281 }
282
283 /* Examine clauses of omp parallel statement PAR and if any prevents
284 gridification, issue a missed-optimization diagnostics and return false,
285 otherwise return true. GRID describes hitherto discovered properties of the
286 loop that is evaluated for possible gridification. */
287
288 static bool
289 grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
290 {
291 tree clauses = gimple_omp_parallel_clauses (par);
292 while (clauses)
293 {
294 switch (OMP_CLAUSE_CODE (clauses))
295 {
296 case OMP_CLAUSE_NUM_THREADS:
297 if (dump_enabled_p ())
298 {
299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
300 GRID_MISSED_MSG_PREFIX "because there is "
301 "a num_threads clause of the parallel "
302 "construct\n");
303 dump_printf_loc (MSG_NOTE, par,
304 "Parallel construct has a num_threads clause\n");
305 }
306 return false;
307
308 case OMP_CLAUSE_REDUCTION:
309 if (dump_enabled_p ())
310 {
311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
312 GRID_MISSED_MSG_PREFIX "a reduction clause "
313 "is present\n ");
314 dump_printf_loc (MSG_NOTE, par,
315 "Parallel construct has a reduction clause\n");
316 }
317 return false;
318
319 default:
320 break;
321 }
322 clauses = OMP_CLAUSE_CHAIN (clauses);
323 }
324 return true;
325 }
326
327 /* Examine clauses and the body of omp loop statement GFOR and if something
328 prevents gridification, issue a missed-optimization diagnostics and return
329 false, otherwise return true. GRID describes hitherto discovered properties
330 of the loop that is evaluated for possible gridification. */
331
332 static bool
333 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
334 {
335 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
336 grid))
337 {
338 if (dump_enabled_p ())
339 {
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
341 GRID_MISSED_MSG_PREFIX "the inner loop "
342 "loop bounds computation contains a complex "
343 "statement\n");
344 dump_printf_loc (MSG_NOTE, gfor,
345 "Loop construct cannot be analyzed for "
346 "gridification\n");
347 }
348 return false;
349 }
350
351 tree clauses = gimple_omp_for_clauses (gfor);
352 while (clauses)
353 {
354 switch (OMP_CLAUSE_CODE (clauses))
355 {
356 case OMP_CLAUSE_SCHEDULE:
357 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
358 {
359 if (dump_enabled_p ())
360 {
361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
362 GRID_MISSED_MSG_PREFIX "the inner loop "
363 "has a non-automatic schedule clause\n");
364 dump_printf_loc (MSG_NOTE, gfor,
365 "Loop construct has a non automatic "
366 "schedule clause\n");
367 }
368 return false;
369 }
370 break;
371
372 case OMP_CLAUSE_REDUCTION:
373 if (dump_enabled_p ())
374 {
375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
376 GRID_MISSED_MSG_PREFIX "a reduction "
377 "clause is present\n ");
378 dump_printf_loc (MSG_NOTE, gfor,
379 "Loop construct has a reduction schedule "
380 "clause\n");
381 }
382 return false;
383
384 default:
385 break;
386 }
387 clauses = OMP_CLAUSE_CHAIN (clauses);
388 }
389 struct walk_stmt_info wi;
390 memset (&wi, 0, sizeof (wi));
391 if (walk_gimple_seq (gimple_omp_body (gfor),
392 grid_find_ungridifiable_statement,
393 NULL, &wi))
394 {
395 gimple *bad = (gimple *) wi.info;
396 if (dump_enabled_p ())
397 {
398 if (is_gimple_call (bad))
399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
400 GRID_MISSED_MSG_PREFIX "the inner loop contains "
401 "call to a noreturn function\n");
402 else
403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
404 GRID_MISSED_MSG_PREFIX "the inner loop contains "
405 "statement %s which cannot be transformed\n",
406 gimple_code_name[(int) gimple_code (bad)]);
407 dump_printf_loc (MSG_NOTE, bad,
408 "This statement cannot be analyzed for "
409 "gridification\n");
410 }
411 return false;
412 }
413 return true;
414 }
415
416 /* Given distribute omp construct represented by DIST, which in the original
417 source forms a compound construct with a looping construct, return true if it
418 can be turned into a gridified HSA kernel. Otherwise return false. GRID
419 describes hitherto discovered properties of the loop that is evaluated for
420 possible gridification. */
421
422 static bool
423 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
424 {
425 dump_user_location_t tloc = grid->target_loc;
426 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
427 grid, "distribute");
428 gomp_parallel *par;
429 if (!stmt
430 || !(par = dyn_cast <gomp_parallel *> (stmt))
431 || !grid_parallel_clauses_gridifiable (par, tloc))
432 return false;
433
434 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
435 "parallel");
436 gomp_for *gfor;
437 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
438 return false;
439
440 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
441 {
442 if (dump_enabled_p ())
443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
444 GRID_MISSED_MSG_PREFIX "the inner loop is not "
445 "a simple for loop\n");
446 return false;
447 }
448 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
449
450 if (!grid_inner_loop_gridifiable_p (gfor, grid))
451 return false;
452
453 return true;
454 }
455
456 /* Given an omp loop statement GFOR, return true if it can participate in
457 tiling gridification, i.e. in one where the distribute and parallel for
458 loops do not form a compound statement. GRID describes hitherto discovered
459 properties of the loop that is evaluated for possible gridification. */
460
461 static bool
462 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
463 {
464 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
465 {
466 if (dump_enabled_p ())
467 {
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
469 GRID_MISSED_MSG_PREFIX "an inner loop is not "
470 "a simple for loop\n");
471 dump_printf_loc (MSG_NOTE, gfor,
472 "This statement is not a simple for loop\n");
473 }
474 return false;
475 }
476
477 if (!grid_inner_loop_gridifiable_p (gfor, grid))
478 return false;
479
480 if (gimple_omp_for_collapse (gfor) != grid->collapse)
481 {
482 if (dump_enabled_p ())
483 {
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
485 GRID_MISSED_MSG_PREFIX "an inner loop does not "
486 "have use the same collapse clause\n");
487 dump_printf_loc (MSG_NOTE, gfor,
488 "Loop construct uses a different collapse clause\n");
489 }
490 return false;
491 }
492
493 struct omp_for_data fd;
494 struct omp_for_data_loop *loops
495 = (struct omp_for_data_loop *)alloca (grid->collapse
496 * sizeof (struct omp_for_data_loop));
497 omp_extract_for_data (gfor, &fd, loops);
498 for (unsigned i = 0; i < grid->collapse; i++)
499 {
500 tree itype, type = TREE_TYPE (fd.loops[i].v);
501 if (POINTER_TYPE_P (type))
502 itype = signed_type_for (type);
503 else
504 itype = type;
505
506 tree n1 = fold_convert (itype, fd.loops[i].n1);
507 tree n2 = fold_convert (itype, fd.loops[i].n2);
508 tree t = build_int_cst (itype,
509 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
510 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
511 t = fold_build2 (PLUS_EXPR, itype, t, n2);
512 t = fold_build2 (MINUS_EXPR, itype, t, n1);
513 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
514 t = fold_build2 (TRUNC_DIV_EXPR, itype,
515 fold_build1 (NEGATE_EXPR, itype, t),
516 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
517 else
518 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
519
520 if (!operand_equal_p (grid->group_sizes[i], t, 0))
521 {
522 if (dump_enabled_p ())
523 {
524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
525 GRID_MISSED_MSG_PREFIX "the distribute and "
526 "an internal loop do not agree on tile size\n");
527 dump_printf_loc (MSG_NOTE, gfor,
528 "Loop construct does not seem to loop over "
529 "a tile size\n");
530 }
531 return false;
532 }
533 }
534 return true;
535 }
536
537 /* Facing a call to FNDECL in the body of a distribute construct, return true
538 if we can handle it or false if it precludes gridification. */
539
540 static bool
541 grid_call_permissible_in_distribute_p (tree fndecl)
542 {
543 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
544 return true;
545
546 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
547 if (strstr (name, "omp_") != name)
548 return false;
549
550 if ((strcmp (name, "omp_get_thread_num") == 0)
551 || (strcmp (name, "omp_get_num_threads") == 0)
552 || (strcmp (name, "omp_get_num_teams") == 0)
553 || (strcmp (name, "omp_get_team_num") == 0)
554 || (strcmp (name, "omp_get_level") == 0)
555 || (strcmp (name, "omp_get_active_level") == 0)
556 || (strcmp (name, "omp_in_parallel") == 0))
557 return true;
558
559 return false;
560 }
561
562 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
563 of a distribute construct that is pointed at by GSI, modify it as necessary
564 for gridification. If the statement itself got removed, return true. */
565
566 static bool
567 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
568 {
569 gimple *stmt = gsi_stmt (*gsi);
570 tree fndecl = gimple_call_fndecl (stmt);
571 gcc_checking_assert (stmt);
572 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
573 return false;
574
575 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
576 if ((strcmp (name, "omp_get_thread_num") == 0)
577 || (strcmp (name, "omp_get_level") == 0)
578 || (strcmp (name, "omp_get_active_level") == 0)
579 || (strcmp (name, "omp_in_parallel") == 0))
580 {
581 tree lhs = gimple_call_lhs (stmt);
582 if (lhs)
583 {
584 gassign *assign
585 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
586 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
587 }
588 gsi_remove (gsi, true);
589 return true;
590 }
591
592 /* The rest of the omp functions can stay as they are, HSA back-end will
593 handle them correctly. */
594 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
595 || (strcmp (name, "omp_get_num_teams") == 0)
596 || (strcmp (name, "omp_get_team_num") == 0));
597 return false;
598 }
599
600 /* Given a sequence of statements within a distribute omp construct or a
601 parallel construct, which in the original source does not form a compound
602 construct with a looping construct, return true if it does not prevent us
603 from turning it into a gridified HSA kernel. Otherwise return false. GRID
604 describes hitherto discovered properties of the loop that is evaluated for
605 possible gridification. IN_PARALLEL must be true if seq is within a
606 parallel construct and flase if it is only within a distribute
607 construct. */
608
609 static bool
610 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
611 bool in_parallel)
612 {
613 gimple_stmt_iterator gsi;
614 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
615 {
616 gimple *stmt = gsi_stmt (gsi);
617
618 if (grid_safe_assignment_p (stmt, grid)
619 || gimple_code (stmt) == GIMPLE_GOTO
620 || gimple_code (stmt) == GIMPLE_LABEL
621 || gimple_code (stmt) == GIMPLE_COND)
622 continue;
623 else if (gbind *bind = dyn_cast <gbind *> (stmt))
624 {
625 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
626 grid, in_parallel))
627 return false;
628 continue;
629 }
630 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
631 {
632 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
633 {
634 if (dump_enabled_p ())
635 {
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
637 GRID_MISSED_MSG_PREFIX "the distribute "
638 "construct contains a try..catch region\n");
639 dump_printf_loc (MSG_NOTE, try_stmt,
640 "This statement cannot be analyzed for "
641 "tiled gridification\n");
642 }
643 return false;
644 }
645 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
646 grid, in_parallel))
647 return false;
648 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
649 grid, in_parallel))
650 return false;
651 continue;
652 }
653 else if (is_gimple_call (stmt))
654 {
655 tree fndecl = gimple_call_fndecl (stmt);
656 if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
657 continue;
658
659 if (dump_enabled_p ())
660 {
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
662 GRID_MISSED_MSG_PREFIX "the distribute "
663 "construct contains a call\n");
664 dump_printf_loc (MSG_NOTE, stmt,
665 "This statement cannot be analyzed for "
666 "tiled gridification\n");
667 }
668 return false;
669 }
670 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
671 {
672 if (in_parallel)
673 {
674 if (dump_enabled_p ())
675 {
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
677 GRID_MISSED_MSG_PREFIX "a parallel "
678 "construct contains another parallel "
679 "construct\n");
680 dump_printf_loc (MSG_NOTE, stmt,
681 "This parallel construct is nested in "
682 "another one\n");
683 }
684 return false;
685 }
686 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
687 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
688 grid, true))
689 return false;
690 }
691 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
692 {
693 if (!in_parallel)
694 {
695 if (dump_enabled_p ())
696 {
697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
698 GRID_MISSED_MSG_PREFIX "a loop "
699 "construct is not nested within a parallel "
700 "construct\n");
701 dump_printf_loc (MSG_NOTE, stmt,
702 "This loop construct is not nested in "
703 "a parallel construct\n");
704 }
705 return false;
706 }
707 if (!grid_gfor_follows_tiling_pattern (gfor, grid))
708 return false;
709 }
710 else
711 {
712 if (dump_enabled_p ())
713 {
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
715 GRID_MISSED_MSG_PREFIX "the distribute "
716 "construct contains a complex statement\n");
717 dump_printf_loc (MSG_NOTE, stmt,
718 "This statement cannot be analyzed for "
719 "tiled gridification\n");
720 }
721 return false;
722 }
723 }
724 return true;
725 }
726
727 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
728 return true, otherwise return false. In the case of success, also fill in
729 GRID with information describing the kernel grid. */
730
731 static bool
732 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
733 {
734 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
735 return false;
736
737 dump_user_location_t tloc = target;
738 grid->target_loc = tloc;
739 gimple *stmt
740 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
741 grid, "target");
742 if (!stmt)
743 return false;
744 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
745 tree group_size = NULL;
746 if (!teams)
747 {
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
750 GRID_MISSED_MSG_PREFIX "it does not have a sole "
751 "teams construct in it.\n");
752 return false;
753 }
754
755 tree clauses = gimple_omp_teams_clauses (teams);
756 while (clauses)
757 {
758 switch (OMP_CLAUSE_CODE (clauses))
759 {
760 case OMP_CLAUSE_NUM_TEAMS:
761 if (dump_enabled_p ())
762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
763 GRID_MISSED_MSG_PREFIX "the teams construct "
764 "contains a num_teams clause\n ");
765 return false;
766
767 case OMP_CLAUSE_REDUCTION:
768 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
770 GRID_MISSED_MSG_PREFIX "a reduction "
771 "clause is present\n ");
772 return false;
773
774 case OMP_CLAUSE_THREAD_LIMIT:
775 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
776 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
777 break;
778
779 default:
780 break;
781 }
782 clauses = OMP_CLAUSE_CHAIN (clauses);
783 }
784
785 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
786 "teams");
787 if (!stmt)
788 return false;
789 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
790 if (!dist)
791 {
792 if (dump_enabled_p ())
793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
794 GRID_MISSED_MSG_PREFIX "the teams construct does not "
795 "have a single distribute construct in it.\n");
796 return false;
797 }
798
799 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
800
801 grid->collapse = gimple_omp_for_collapse (dist);
802 if (grid->collapse > 3)
803 {
804 if (dump_enabled_p ())
805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
806 GRID_MISSED_MSG_PREFIX "the distribute construct "
807 "contains collapse clause with parameter greater "
808 "than 3\n");
809 return false;
810 }
811
812 struct omp_for_data fd;
813 struct omp_for_data_loop *dist_loops
814 = (struct omp_for_data_loop *)alloca (grid->collapse
815 * sizeof (struct omp_for_data_loop));
816 omp_extract_for_data (dist, &fd, dist_loops);
817 if (fd.chunk_size)
818 {
819 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
820 {
821 if (dump_enabled_p ())
822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
823 GRID_MISSED_MSG_PREFIX "the teams "
824 "thread limit is different from distribute "
825 "schedule chunk\n");
826 return false;
827 }
828 group_size = fd.chunk_size;
829 }
830 if (group_size && grid->collapse > 1)
831 {
832 if (dump_enabled_p ())
833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
834 GRID_MISSED_MSG_PREFIX "group size cannot be "
835 "set using thread_limit or schedule clauses "
836 "when also using a collapse clause greater than 1\n");
837 return false;
838 }
839
840 if (gimple_omp_for_combined_p (dist))
841 {
842 grid->tiling = false;
843 grid->group_sizes[0] = group_size;
844 for (unsigned i = 1; i < grid->collapse; i++)
845 grid->group_sizes[i] = NULL;
846 return grid_dist_follows_simple_pattern (dist, grid);
847 }
848 else
849 {
850 grid->tiling = true;
851 if (group_size)
852 {
853 if (dump_enabled_p ())
854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
855 GRID_MISSED_MSG_PREFIX "group size cannot be set "
856 "using thread_limit or schedule clauses when "
857 "distribute and loop constructs do not form "
858 "one combined construct\n");
859 return false;
860 }
861 for (unsigned i = 0; i < grid->collapse; i++)
862 {
863 if (fd.loops[i].cond_code == GT_EXPR)
864 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
865 TREE_TYPE (fd.loops[i].step),
866 fd.loops[i].step);
867 else
868 grid->group_sizes[i] = fd.loops[i].step;
869 }
870 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
871 false);
872 }
873 }
874
875 /* Operand walker, used to remap pre-body declarations according to a hash map
876 provided in DATA. */
877
878 static tree
879 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
880 {
881 tree t = *tp;
882
883 if (DECL_P (t) || TYPE_P (t))
884 *walk_subtrees = 0;
885 else
886 *walk_subtrees = 1;
887
888 if (VAR_P (t))
889 {
890 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
891 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
892 tree *repl = declmap->get (t);
893 if (repl)
894 *tp = *repl;
895 }
896 return NULL_TREE;
897 }
898
899 /* Identifiers of segments into which a particular variable should be places
900 when gridifying. */
901
902 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
903 GRID_SEGMENT_GLOBAL};
904
905 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
906 builtin call into SEQ that will make sure the variable is always considered
907 address taken. */
908
909 static void
910 grid_mark_variable_segment (tree var, enum grid_var_segment segment)
911 {
912 /* Making a non-addressable variables would require that we re-gimplify all
913 their uses. Fortunately, we do not have to do this because if they are
914 not addressable, it means they are not used in atomic or parallel
915 statements and so relaxed GPU consistency rules mean we can just keep them
916 private. */
917 if (!TREE_ADDRESSABLE (var))
918 return;
919
920 switch (segment)
921 {
922 case GRID_SEGMENT_GROUP:
923 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
924 NULL, DECL_ATTRIBUTES (var));
925 break;
926 case GRID_SEGMENT_GLOBAL:
927 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
928 NULL, DECL_ATTRIBUTES (var));
929 break;
930 default:
931 gcc_unreachable ();
932 }
933
934 if (!TREE_STATIC (var))
935 {
936 TREE_STATIC (var) = 1;
937 const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var));
938 SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix));
939 varpool_node::finalize_decl (var);
940 }
941
942 }
943
944 /* Copy leading register-type assignments to local variables in SRC to just
945 before DST, Creating temporaries, adjusting mapping of operands in WI and
946 remapping operands as necessary. Add any new temporaries to TGT_BIND.
947 Return the first statement that does not conform to grid_safe_assignment_p
948 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
949 variables in traversed bind statements so that they are put into the
950 appropriate segment. */
951
952 static gimple *
953 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
954 gbind *tgt_bind,
955 enum grid_var_segment var_segment,
956 struct walk_stmt_info *wi)
957 {
958 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
959 gimple_stmt_iterator gsi;
960 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
961 {
962 gimple *stmt = gsi_stmt (gsi);
963 if (gbind *bind = dyn_cast <gbind *> (stmt))
964 {
965 gimple *r = grid_copy_leading_local_assignments
966 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
967
968 if (var_segment != GRID_SEGMENT_PRIVATE)
969 for (tree var = gimple_bind_vars (bind);
970 var;
971 var = DECL_CHAIN (var))
972 grid_mark_variable_segment (var, var_segment);
973 if (r)
974 return r;
975 else
976 continue;
977 }
978 if (!grid_safe_assignment_p (stmt, NULL))
979 return stmt;
980 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
981 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
982 TREE_TYPE (lhs));
983 DECL_CONTEXT (repl) = current_function_decl;
984 gimple_bind_append_vars (tgt_bind, repl);
985
986 declmap->put (lhs, repl);
987 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
988 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
989 gsi_insert_before (dst, copy, GSI_SAME_STMT);
990 }
991 return NULL;
992 }
993
994 /* Statement walker function to make adjustments to statements within the
995 gridifed kernel copy. */
996
997 static tree
998 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
999 struct walk_stmt_info *)
1000 {
1001 *handled_ops_p = false;
1002 gimple *stmt = gsi_stmt (*gsi);
1003 if (gimple_code (stmt) == GIMPLE_OMP_FOR
1004 && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
1005 {
1006 gomp_for *loop = as_a <gomp_for *> (stmt);
1007 tree clauses = gimple_omp_for_clauses (loop);
1008 tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
1009 if (cl)
1010 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
1011 else
1012 {
1013 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
1014 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
1015 OMP_CLAUSE_CHAIN (c) = clauses;
1016 gimple_omp_for_set_clauses (loop, c);
1017 }
1018 }
1019 return NULL_TREE;
1020 }
1021
1022 /* Given a PARLOOP that is a normal for looping construct but also a part of a
1023 combined construct with a simd loop, eliminate the simd loop. */
1024
1025 static void
1026 grid_eliminate_combined_simd_part (gomp_for *parloop)
1027 {
1028 struct walk_stmt_info wi;
1029
1030 memset (&wi, 0, sizeof (wi));
1031 wi.val_only = true;
1032 enum gf_mask msk = GF_OMP_FOR_SIMD;
1033 wi.info = (void *) &msk;
1034 walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
1035 gimple *stmt = (gimple *) wi.info;
1036 /* We expect that the SIMD id the only statement in the parallel loop. */
1037 gcc_assert (stmt
1038 && gimple_code (stmt) == GIMPLE_OMP_FOR
1039 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
1040 && gimple_omp_for_combined_into_p (stmt)
1041 && !gimple_omp_for_combined_p (stmt));
1042 gomp_for *simd = as_a <gomp_for *> (stmt);
1043
1044 /* Copy over the iteration properties because the body refers to the index in
1045 the bottmom-most loop. */
1046 unsigned i, collapse = gimple_omp_for_collapse (parloop);
1047 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
1048 for (i = 0; i < collapse; i++)
1049 {
1050 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
1051 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
1052 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
1053 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
1054 }
1055
1056 tree *tgt= gimple_omp_for_clauses_ptr (parloop);
1057 while (*tgt)
1058 tgt = &OMP_CLAUSE_CHAIN (*tgt);
1059
1060 /* Copy over all clauses, except for linear clauses, which are turned into
1061 private clauses, and all other simd-specific clauses, which are
1062 ignored. */
1063 tree *pc = gimple_omp_for_clauses_ptr (simd);
1064 while (*pc)
1065 {
1066 tree c = *pc;
1067 switch (TREE_CODE (c))
1068 {
1069 case OMP_CLAUSE_LINEAR:
1070 {
1071 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
1072 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
1073 OMP_CLAUSE_CHAIN (priv) = NULL;
1074 *tgt = priv;
1075 tgt = &OMP_CLAUSE_CHAIN (priv);
1076 pc = &OMP_CLAUSE_CHAIN (c);
1077 break;
1078 }
1079
1080 case OMP_CLAUSE_SAFELEN:
1081 case OMP_CLAUSE_SIMDLEN:
1082 case OMP_CLAUSE_ALIGNED:
1083 pc = &OMP_CLAUSE_CHAIN (c);
1084 break;
1085
1086 default:
1087 *pc = OMP_CLAUSE_CHAIN (c);
1088 OMP_CLAUSE_CHAIN (c) = NULL;
1089 *tgt = c;
1090 tgt = &OMP_CLAUSE_CHAIN (c);
1091 break;
1092 }
1093 }
1094
1095 /* Finally, throw away the simd and mark the parallel loop as not
1096 combined. */
1097 gimple_omp_set_body (parloop, gimple_omp_body (simd));
1098 gimple_omp_for_set_combined_p (parloop, false);
1099 }
1100
1101 /* Statement walker function marking all parallels as grid_phony and loops as
1102 grid ones representing threads of a particular thread group. */
1103
1104 static tree
1105 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1106 struct walk_stmt_info *wi_in)
1107 {
1108 *handled_ops_p = false;
1109 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
1110 {
1111 *handled_ops_p = true;
1112 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
1113 gimple_omp_for_set_grid_intra_group (loop, true);
1114 if (gimple_omp_for_combined_p (loop))
1115 grid_eliminate_combined_simd_part (loop);
1116
1117 struct walk_stmt_info body_wi;
1118 memset (&body_wi, 0, sizeof (body_wi));
1119 walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
1120 grid_process_grid_body, NULL, &body_wi);
1121
1122 gbind *bind = (gbind *) wi_in->info;
1123 tree c;
1124 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
1125 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
1126 {
1127 push_gimplify_context ();
1128 tree ov = OMP_CLAUSE_DECL (c);
1129 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
1130 TREE_TYPE (ov));
1131
1132 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
1133 DECL_CONTEXT (gv) = current_function_decl;
1134 gimple_bind_append_vars (bind, gv);
1135 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
1136 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
1137 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
1138 gimple_seq l = NULL;
1139 gimplify_and_add (x, &l);
1140 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
1141 pop_gimplify_context (bind);
1142 }
1143 }
1144 return NULL_TREE;
1145 }
1146
1147 /* Statement walker function marking all parallels as grid_phony and loops as
1148 grid ones representing threads of a particular thread group. */
1149
1150 static tree
1151 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
1152 bool *handled_ops_p,
1153 struct walk_stmt_info *wi_in)
1154 {
1155 *handled_ops_p = false;
1156 wi_in->removed_stmt = false;
1157 gimple *stmt = gsi_stmt (*gsi);
1158 if (gbind *bind = dyn_cast <gbind *> (stmt))
1159 {
1160 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
1161 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
1162 }
1163 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
1164 {
1165 *handled_ops_p = true;
1166 gimple_omp_parallel_set_grid_phony (parallel, true);
1167
1168 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
1169 gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
1170 gimple_seq s = NULL;
1171 gimple_seq_add_stmt (&s, new_bind);
1172 gimple_omp_set_body (parallel, s);
1173
1174 struct walk_stmt_info wi_par;
1175 memset (&wi_par, 0, sizeof (wi_par));
1176 wi_par.info = new_bind;
1177 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
1178 grid_mark_tiling_loops, NULL, &wi_par);
1179 }
1180 else if (is_a <gcall *> (stmt))
1181 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
1182 return NULL_TREE;
1183 }
1184
1185 /* Given freshly copied top level kernel SEQ, identify the individual OMP
1186 components, mark them as part of kernel, copy assignment leading to them
1187 just before DST, remapping them using WI and adding new temporaries to
1188 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
1189
1190 static gomp_for *
1191 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
1192 gimple_stmt_iterator *dst,
1193 gbind *tgt_bind, struct walk_stmt_info *wi)
1194 {
1195 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
1196 GRID_SEGMENT_GLOBAL, wi);
1197 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
1198 gcc_assert (teams);
1199 gimple_omp_teams_set_grid_phony (teams, true);
1200 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
1201 tgt_bind, GRID_SEGMENT_GLOBAL,
1202 wi);
1203 gcc_checking_assert (stmt);
1204 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
1205 gcc_assert (dist);
1206 gimple_seq prebody = gimple_omp_for_pre_body (dist);
1207 if (prebody)
1208 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1209 GRID_SEGMENT_GROUP, wi);
1210
1211 if (grid->tiling)
1212 {
1213 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
1214 gimple_omp_for_set_grid_group_iter (dist, true);
1215
1216 struct walk_stmt_info wi_tiled;
1217 memset (&wi_tiled, 0, sizeof (wi_tiled));
1218 walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
1219 grid_mark_tiling_parallels_and_loops, NULL,
1220 &wi_tiled);
1221 return dist;
1222 }
1223 else
1224 {
1225 gimple_omp_for_set_grid_phony (dist, true);
1226 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
1227 tgt_bind,
1228 GRID_SEGMENT_PRIVATE, wi);
1229 gcc_checking_assert (stmt);
1230 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
1231 gimple_omp_parallel_set_grid_phony (parallel, true);
1232 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
1233 dst, tgt_bind,
1234 GRID_SEGMENT_PRIVATE, wi);
1235 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
1236 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
1237 prebody = gimple_omp_for_pre_body (inner_loop);
1238 if (prebody)
1239 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1240 GRID_SEGMENT_PRIVATE, wi);
1241
1242 if (gimple_omp_for_combined_p (inner_loop))
1243 grid_eliminate_combined_simd_part (inner_loop);
1244 struct walk_stmt_info body_wi;
1245 memset (&body_wi, 0, sizeof (body_wi));
1246 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
1247 grid_process_grid_body, NULL, &body_wi);
1248
1249 return inner_loop;
1250 }
1251 }
1252
1253 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1254 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
1255 is the bind into which temporaries inserted before TARGET should be
1256 added. */
1257
1258 static void
1259 grid_attempt_target_gridification (gomp_target *target,
1260 gimple_stmt_iterator *gsi,
1261 gbind *tgt_bind)
1262 {
1263 /* removed group_size */
1264 grid_prop grid = {};
1265 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
1266 return;
1267
1268 location_t loc = gimple_location (target);
1269 if (dump_enabled_p ())
1270 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
1271 "Target construct will be turned into a gridified HSA "
1272 "kernel\n");
1273
1274 /* Copy target body to a GPUKERNEL construct: */
1275 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
1276 (gimple_omp_body (target));
1277
1278 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
1279 struct walk_stmt_info wi;
1280 memset (&wi, 0, sizeof (struct walk_stmt_info));
1281 wi.info = declmap;
1282
1283 /* Copy assignments in between OMP statements before target, mark OMP
1284 statements within copy appropriately. */
1285 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
1286 tgt_bind, &wi);
1287
1288 gbind *old_bind
1289 = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
1290 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
1291 tree new_block = gimple_bind_block (new_bind);
1292 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
1293 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
1294 BLOCK_SUBBLOCKS (enc_block) = new_block;
1295 BLOCK_SUPERCONTEXT (new_block) = enc_block;
1296 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
1297 gimple_seq_add_stmt
1298 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
1299 gpukernel);
1300
1301 for (size_t i = 0; i < grid.collapse; i++)
1302 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
1303 push_gimplify_context ();
1304 for (size_t i = 0; i < grid.collapse; i++)
1305 {
1306 tree index_var = gimple_omp_for_index (inner_loop, i);
1307 tree itype, type = TREE_TYPE (index_var);
1308 if (POINTER_TYPE_P (type))
1309 itype = signed_type_for (type);
1310 else
1311 itype = type;
1312
1313 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
1314 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
1315 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
1316 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
1317 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
1318 tree step
1319 = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
1320 omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step);
1321 n1 = fold_convert (itype, n1);
1322 n2 = fold_convert (itype, n2);
1323
1324 tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
1325
1326 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
1327 t = fold_build2 (PLUS_EXPR, itype, step, t);
1328 t = fold_build2 (PLUS_EXPR, itype, t, n2);
1329 t = fold_build2 (MINUS_EXPR, itype, t, n1);
1330 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
1331 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1332 fold_build1 (NEGATE_EXPR, itype, t),
1333 fold_build1 (NEGATE_EXPR, itype, step));
1334 else
1335 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
1336 t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
1337 if (grid.tiling)
1338 {
1339 if (cond_code == GT_EXPR)
1340 step = fold_build1 (NEGATE_EXPR, itype, step);
1341 t = fold_build2 (MULT_EXPR, itype, t, step);
1342 }
1343
1344 tree gs = fold_convert (uint32_type_node, t);
1345 gimple_seq tmpseq = NULL;
1346 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1347 if (!gimple_seq_empty_p (tmpseq))
1348 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1349
1350 tree ws;
1351 if (grid.group_sizes[i])
1352 {
1353 ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
1354 tmpseq = NULL;
1355 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1356 if (!gimple_seq_empty_p (tmpseq))
1357 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1358 }
1359 else
1360 ws = build_zero_cst (uint32_type_node);
1361
1362 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
1363 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
1364 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
1365 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
1366 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
1367 gimple_omp_target_set_clauses (target, c);
1368 }
1369 pop_gimplify_context (tgt_bind);
1370 delete declmap;
1371 return;
1372 }
1373
1374 /* Walker function doing all the work for create_target_kernels. */
1375
1376 static tree
1377 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
1378 bool *handled_ops_p,
1379 struct walk_stmt_info *incoming)
1380 {
1381 *handled_ops_p = false;
1382
1383 gimple *stmt = gsi_stmt (*gsi);
1384 gomp_target *target = dyn_cast <gomp_target *> (stmt);
1385 if (target)
1386 {
1387 gbind *tgt_bind = (gbind *) incoming->info;
1388 gcc_checking_assert (tgt_bind);
1389 grid_attempt_target_gridification (target, gsi, tgt_bind);
1390 return NULL_TREE;
1391 }
1392 gbind *bind = dyn_cast <gbind *> (stmt);
1393 if (bind)
1394 {
1395 *handled_ops_p = true;
1396 struct walk_stmt_info wi;
1397 memset (&wi, 0, sizeof (wi));
1398 wi.info = bind;
1399 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
1400 grid_gridify_all_targets_stmt, NULL, &wi);
1401 }
1402 return NULL_TREE;
1403 }
1404
1405 /* Attempt to gridify all target constructs in BODY_P. All such targets will
1406 have their bodies duplicated, with the new copy being put into a
1407 gimple_omp_grid_body statement. All kernel-related construct within the
1408 grid_body will be marked with phony flags or kernel kinds. Moreover, some
1409 re-structuring is often needed, such as copying pre-bodies before the target
1410 construct so that kernel grid sizes can be computed. */
1411
1412 void
1413 omp_grid_gridify_all_targets (gimple_seq *body_p)
1414 {
1415 struct walk_stmt_info wi;
1416 memset (&wi, 0, sizeof (wi));
1417 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
1418 }