]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-grid.c
[Ada] Improved support for aspect alignment in CCG
[thirdparty/gcc.git] / gcc / omp-grid.c
CommitLineData
629b3d75
MJ
1/* Lowering and expansion of OpenMP directives for HSA GPU agents.
2
8d9254fc 3 Copyright (C) 2013-2020 Free Software Foundation, Inc.
629b3d75
MJ
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "tree.h"
26#include "gimple.h"
27#include "tree-pass.h"
28#include "ssa.h"
29#include "cgraph.h"
30#include "pretty-print.h"
31#include "fold-const.h"
32#include "gimplify.h"
33#include "gimple-iterator.h"
34#include "gimple-walk.h"
35#include "tree-inline.h"
36#include "langhooks.h"
37#include "omp-general.h"
38#include "omp-low.h"
39#include "omp-grid.h"
40#include "gimple-pretty-print.h"
41
42/* Return the lastprivate predicate for a given gridified loop described by
43 FD). */
44
45tree
46omp_grid_lastprivate_predicate (struct omp_for_data *fd)
47{
48 /* When dealing with a gridified loop, we need to check up to three collapsed
49 iteration variables but they are not actually captured in this fd.
50 Fortunately, we can easily rely on HSA builtins to get this
01914336 51 information. */
629b3d75
MJ
52
53 tree id, size;
54 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
55 && gimple_omp_for_grid_intra_group (fd->for_stmt))
56 {
57 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
58 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
59 }
60 else
61 {
62 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
63 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
64 }
65 tree cond = NULL;
66 for (int dim = 0; dim < fd->collapse; dim++)
67 {
68 tree dim_tree = build_int_cstu (unsigned_type_node, dim);
69 tree u1 = build_int_cstu (unsigned_type_node, 1);
70 tree c2
71 = build2 (EQ_EXPR, boolean_type_node,
72 build2 (PLUS_EXPR, unsigned_type_node,
73 build_call_expr (id, 1, dim_tree), u1),
74 build_call_expr (size, 1, dim_tree));
75 if (cond)
76 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
77 else
78 cond = c2;
79 }
80 return cond;
81}
82
83/* Structure describing the basic properties of the loop we ara analyzing
01914336 84 whether it can be gridified and when it is gridified. */
629b3d75 85
6c1dae73 86class grid_prop
629b3d75 87{
6c1dae73 88public:
629b3d75
MJ
89 /* True when we are doing tiling gridification, i.e. when there is a distinct
90 distribute loop over groups and a loop construct over work-items. False
91 when distribute and parallel for loops form a combined construct. */
92 bool tiling;
93 /* Location of the target construct for optimization information
94 messages. */
4f5b9c80 95 dump_user_location_t target_loc;
629b3d75
MJ
96 /* The collapse clause of the involved loops. Collapse value of all of them
97 must be the same for gridification to take place. */
98 size_t collapse;
99 /* Group sizes, if requested by the user or NULL if not requested. */
100 tree group_sizes[3];
101};
102
103#define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
104 "gridified HSA kernel because "
105
106/* Return true if STMT is an assignment of a register-type into a local
107 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
108 any of the trees specifying group sizes there. */
109
110static bool
111grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
112{
113 gassign *assign = dyn_cast <gassign *> (stmt);
114 if (!assign)
115 return false;
116 if (gimple_clobber_p (assign))
117 return true;
118 tree lhs = gimple_assign_lhs (assign);
119 if (!VAR_P (lhs)
120 || !is_gimple_reg_type (TREE_TYPE (lhs))
121 || is_global_var (lhs))
122 return false;
123 if (grid)
124 for (unsigned i = 0; i < grid->collapse; i++)
125 if (lhs == grid->group_sizes[i])
126 return false;
127 return true;
128}
129
130/* Return true if all statements in SEQ are assignments to local register-type
131 variables that do not hold group size information. */
132
133static bool
134grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
135{
136 if (!seq)
137 return true;
138
139 gimple_stmt_iterator gsi;
140 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
141 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
142 return false;
143 return true;
144}
145
146/* Scan statements in SEQ and call itself recursively on any bind. GRID
147 describes hitherto discovered properties of the loop that is evaluated for
148 possible gridification. If during whole search only assignments to
149 register-type local variables (that do not overwrite group size information)
150 and one single OMP statement is encountered, return true, otherwise return
151 false. RET is where we store any OMP statement encountered. */
152
153static bool
154grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
155 const char *name, gimple **ret)
156{
157 gimple_stmt_iterator gsi;
158 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
159 {
160 gimple *stmt = gsi_stmt (gsi);
161
162 if (grid_safe_assignment_p (stmt, grid))
163 continue;
164 if (gbind *bind = dyn_cast <gbind *> (stmt))
165 {
01914336
MJ
166 gimple_seq bind_body = gimple_bind_body (bind);
167 if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
168 ret))
629b3d75
MJ
169 return false;
170 }
171 else if (is_gimple_omp (stmt))
172 {
173 if (*ret)
174 {
175 if (dump_enabled_p ())
176 {
177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
178 GRID_MISSED_MSG_PREFIX "%s construct "
179 "contains multiple OpenMP constructs\n",
180 name);
4f5b9c80 181 dump_printf_loc (MSG_NOTE, *ret,
629b3d75
MJ
182 "The first OpenMP construct within "
183 "a parallel\n");
4f5b9c80 184 dump_printf_loc (MSG_NOTE, stmt,
629b3d75
MJ
185 "The second OpenMP construct within "
186 "a parallel\n");
187 }
188 return false;
189 }
190 *ret = stmt;
191 }
192 else
193 {
194 if (dump_enabled_p ())
195 {
196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
197 GRID_MISSED_MSG_PREFIX "%s construct contains "
198 "a complex statement\n", name);
4f5b9c80 199 dump_printf_loc (MSG_NOTE, stmt,
629b3d75
MJ
200 "This statement cannot be analyzed for "
201 "gridification\n");
202 }
203 return false;
204 }
205 }
206 return true;
207}
208
209/* Scan statements in SEQ and make sure that it and any binds in it contain
210 only assignments to local register-type variables (that do not overwrite
211 group size information) and one OMP construct. If so, return that
212 construct, otherwise return NULL. GRID describes hitherto discovered
213 properties of the loop that is evaluated for possible gridification. If
214 dumping is enabled and function fails, use NAME to dump a note with the
215 reason for failure. */
216
217static gimple *
218grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
219 const char *name)
220{
221 if (!seq)
222 {
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
225 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
226 name);
227 return NULL;
228 }
229
230 gimple *ret = NULL;
231 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
232 {
233 if (!ret && dump_enabled_p ())
234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
235 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
aa326bfb 236 " any other OpenMP construct\n", name);
629b3d75
MJ
237 return ret;
238 }
239 else
240 return NULL;
241}
242
243/* Walker function looking for statements there is no point gridifying (and for
244 noreturn function calls which we cannot do). Return non-NULL if such a
245 function is found. */
246
247static tree
248grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
249 bool *handled_ops_p,
250 struct walk_stmt_info *wi)
251{
252 *handled_ops_p = false;
253 gimple *stmt = gsi_stmt (*gsi);
254 switch (gimple_code (stmt))
255 {
256 case GIMPLE_CALL:
257 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
258 {
259 *handled_ops_p = true;
260 wi->info = stmt;
261 return error_mark_node;
262 }
263 break;
264
265 /* We may reduce the following list if we find a way to implement the
266 clauses, but now there is no point trying further. */
267 case GIMPLE_OMP_CRITICAL:
268 case GIMPLE_OMP_TASKGROUP:
269 case GIMPLE_OMP_TASK:
270 case GIMPLE_OMP_SECTION:
271 case GIMPLE_OMP_SECTIONS:
272 case GIMPLE_OMP_SECTIONS_SWITCH:
273 case GIMPLE_OMP_TARGET:
274 case GIMPLE_OMP_ORDERED:
275 *handled_ops_p = true;
276 wi->info = stmt;
277 return error_mark_node;
278 default:
279 break;
280 }
281 return NULL;
282}
283
284/* Examine clauses of omp parallel statement PAR and if any prevents
285 gridification, issue a missed-optimization diagnostics and return false,
286 otherwise return true. GRID describes hitherto discovered properties of the
287 loop that is evaluated for possible gridification. */
288
289static bool
4f5b9c80 290grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
629b3d75
MJ
291{
292 tree clauses = gimple_omp_parallel_clauses (par);
293 while (clauses)
294 {
295 switch (OMP_CLAUSE_CODE (clauses))
296 {
297 case OMP_CLAUSE_NUM_THREADS:
298 if (dump_enabled_p ())
299 {
300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
301 GRID_MISSED_MSG_PREFIX "because there is "
302 "a num_threads clause of the parallel "
303 "construct\n");
4f5b9c80 304 dump_printf_loc (MSG_NOTE, par,
629b3d75
MJ
305 "Parallel construct has a num_threads clause\n");
306 }
307 return false;
308
309 case OMP_CLAUSE_REDUCTION:
310 if (dump_enabled_p ())
311 {
312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
aa326bfb 313 GRID_MISSED_MSG_PREFIX "a reduction clause "
629b3d75 314 "is present\n ");
4f5b9c80 315 dump_printf_loc (MSG_NOTE, par,
629b3d75
MJ
316 "Parallel construct has a reduction clause\n");
317 }
318 return false;
319
320 default:
321 break;
322 }
323 clauses = OMP_CLAUSE_CHAIN (clauses);
324 }
325 return true;
326}
327
328/* Examine clauses and the body of omp loop statement GFOR and if something
329 prevents gridification, issue a missed-optimization diagnostics and return
01914336 330 false, otherwise return true. GRID describes hitherto discovered properties
629b3d75
MJ
331 of the loop that is evaluated for possible gridification. */
332
333static bool
334grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
335{
336 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
337 grid))
338 {
339 if (dump_enabled_p ())
340 {
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
342 GRID_MISSED_MSG_PREFIX "the inner loop "
343 "loop bounds computation contains a complex "
344 "statement\n");
4f5b9c80 345 dump_printf_loc (MSG_NOTE, gfor,
629b3d75
MJ
346 "Loop construct cannot be analyzed for "
347 "gridification\n");
348 }
349 return false;
350 }
351
352 tree clauses = gimple_omp_for_clauses (gfor);
353 while (clauses)
354 {
355 switch (OMP_CLAUSE_CODE (clauses))
356 {
357 case OMP_CLAUSE_SCHEDULE:
358 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
359 {
360 if (dump_enabled_p ())
361 {
362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
363 GRID_MISSED_MSG_PREFIX "the inner loop "
364 "has a non-automatic schedule clause\n");
4f5b9c80 365 dump_printf_loc (MSG_NOTE, gfor,
629b3d75
MJ
366 "Loop construct has a non automatic "
367 "schedule clause\n");
368 }
369 return false;
370 }
371 break;
372
373 case OMP_CLAUSE_REDUCTION:
374 if (dump_enabled_p ())
375 {
376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
377 GRID_MISSED_MSG_PREFIX "a reduction "
378 "clause is present\n ");
4f5b9c80 379 dump_printf_loc (MSG_NOTE, gfor,
629b3d75
MJ
380 "Loop construct has a reduction schedule "
381 "clause\n");
382 }
383 return false;
384
385 default:
386 break;
387 }
388 clauses = OMP_CLAUSE_CHAIN (clauses);
389 }
390 struct walk_stmt_info wi;
391 memset (&wi, 0, sizeof (wi));
392 if (walk_gimple_seq (gimple_omp_body (gfor),
393 grid_find_ungridifiable_statement,
394 NULL, &wi))
395 {
396 gimple *bad = (gimple *) wi.info;
397 if (dump_enabled_p ())
398 {
399 if (is_gimple_call (bad))
400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
401 GRID_MISSED_MSG_PREFIX "the inner loop contains "
402 "call to a noreturn function\n");
403 else
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
405 GRID_MISSED_MSG_PREFIX "the inner loop contains "
406 "statement %s which cannot be transformed\n",
407 gimple_code_name[(int) gimple_code (bad)]);
4f5b9c80 408 dump_printf_loc (MSG_NOTE, bad,
629b3d75
MJ
409 "This statement cannot be analyzed for "
410 "gridification\n");
411 }
412 return false;
413 }
414 return true;
415}
416
417/* Given distribute omp construct represented by DIST, which in the original
418 source forms a compound construct with a looping construct, return true if it
01914336 419 can be turned into a gridified HSA kernel. Otherwise return false. GRID
629b3d75
MJ
420 describes hitherto discovered properties of the loop that is evaluated for
421 possible gridification. */
422
423static bool
424grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
425{
4f5b9c80 426 dump_user_location_t tloc = grid->target_loc;
629b3d75
MJ
427 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
428 grid, "distribute");
429 gomp_parallel *par;
430 if (!stmt
431 || !(par = dyn_cast <gomp_parallel *> (stmt))
432 || !grid_parallel_clauses_gridifiable (par, tloc))
433 return false;
434
435 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
436 "parallel");
437 gomp_for *gfor;
438 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
439 return false;
440
441 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
442 {
443 if (dump_enabled_p ())
444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
445 GRID_MISSED_MSG_PREFIX "the inner loop is not "
446 "a simple for loop\n");
447 return false;
448 }
449 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
450
451 if (!grid_inner_loop_gridifiable_p (gfor, grid))
452 return false;
453
454 return true;
455}
456
457/* Given an omp loop statement GFOR, return true if it can participate in
458 tiling gridification, i.e. in one where the distribute and parallel for
459 loops do not form a compound statement. GRID describes hitherto discovered
01914336 460 properties of the loop that is evaluated for possible gridification. */
629b3d75
MJ
461
462static bool
463grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
464{
465 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
466 {
467 if (dump_enabled_p ())
468 {
469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
470 GRID_MISSED_MSG_PREFIX "an inner loop is not "
471 "a simple for loop\n");
4f5b9c80 472 dump_printf_loc (MSG_NOTE, gfor,
629b3d75
MJ
473 "This statement is not a simple for loop\n");
474 }
475 return false;
476 }
477
478 if (!grid_inner_loop_gridifiable_p (gfor, grid))
479 return false;
480
481 if (gimple_omp_for_collapse (gfor) != grid->collapse)
482 {
483 if (dump_enabled_p ())
484 {
485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
486 GRID_MISSED_MSG_PREFIX "an inner loop does not "
487 "have use the same collapse clause\n");
4f5b9c80 488 dump_printf_loc (MSG_NOTE, gfor,
629b3d75
MJ
489 "Loop construct uses a different collapse clause\n");
490 }
491 return false;
492 }
493
494 struct omp_for_data fd;
495 struct omp_for_data_loop *loops
496 = (struct omp_for_data_loop *)alloca (grid->collapse
497 * sizeof (struct omp_for_data_loop));
498 omp_extract_for_data (gfor, &fd, loops);
499 for (unsigned i = 0; i < grid->collapse; i++)
500 {
501 tree itype, type = TREE_TYPE (fd.loops[i].v);
502 if (POINTER_TYPE_P (type))
503 itype = signed_type_for (type);
504 else
505 itype = type;
506
507 tree n1 = fold_convert (itype, fd.loops[i].n1);
508 tree n2 = fold_convert (itype, fd.loops[i].n2);
509 tree t = build_int_cst (itype,
510 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
511 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
512 t = fold_build2 (PLUS_EXPR, itype, t, n2);
513 t = fold_build2 (MINUS_EXPR, itype, t, n1);
514 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
515 t = fold_build2 (TRUNC_DIV_EXPR, itype,
516 fold_build1 (NEGATE_EXPR, itype, t),
517 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
518 else
519 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
520
521 if (!operand_equal_p (grid->group_sizes[i], t, 0))
522 {
523 if (dump_enabled_p ())
524 {
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
526 GRID_MISSED_MSG_PREFIX "the distribute and "
527 "an internal loop do not agree on tile size\n");
4f5b9c80 528 dump_printf_loc (MSG_NOTE, gfor,
629b3d75
MJ
529 "Loop construct does not seem to loop over "
530 "a tile size\n");
531 }
532 return false;
533 }
534 }
535 return true;
536}
537
538/* Facing a call to FNDECL in the body of a distribute construct, return true
539 if we can handle it or false if it precludes gridification. */
540
541static bool
542grid_call_permissible_in_distribute_p (tree fndecl)
543{
544 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
545 return true;
546
547 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
548 if (strstr (name, "omp_") != name)
549 return false;
550
551 if ((strcmp (name, "omp_get_thread_num") == 0)
552 || (strcmp (name, "omp_get_num_threads") == 0)
553 || (strcmp (name, "omp_get_num_teams") == 0)
554 || (strcmp (name, "omp_get_team_num") == 0)
555 || (strcmp (name, "omp_get_level") == 0)
556 || (strcmp (name, "omp_get_active_level") == 0)
557 || (strcmp (name, "omp_in_parallel") == 0))
558 return true;
559
560 return false;
561}
562
563/* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
564 of a distribute construct that is pointed at by GSI, modify it as necessary
565 for gridification. If the statement itself got removed, return true. */
566
567static bool
568grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
569{
570 gimple *stmt = gsi_stmt (*gsi);
571 tree fndecl = gimple_call_fndecl (stmt);
572 gcc_checking_assert (stmt);
573 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
574 return false;
575
576 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
577 if ((strcmp (name, "omp_get_thread_num") == 0)
578 || (strcmp (name, "omp_get_level") == 0)
579 || (strcmp (name, "omp_get_active_level") == 0)
580 || (strcmp (name, "omp_in_parallel") == 0))
581 {
582 tree lhs = gimple_call_lhs (stmt);
583 if (lhs)
584 {
585 gassign *assign
586 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
587 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
588 }
589 gsi_remove (gsi, true);
590 return true;
591 }
592
593 /* The rest of the omp functions can stay as they are, HSA back-end will
594 handle them correctly. */
595 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
596 || (strcmp (name, "omp_get_num_teams") == 0)
597 || (strcmp (name, "omp_get_team_num") == 0));
598 return false;
599}
600
601/* Given a sequence of statements within a distribute omp construct or a
602 parallel construct, which in the original source does not form a compound
603 construct with a looping construct, return true if it does not prevent us
01914336 604 from turning it into a gridified HSA kernel. Otherwise return false. GRID
629b3d75
MJ
605 describes hitherto discovered properties of the loop that is evaluated for
606 possible gridification. IN_PARALLEL must be true if seq is within a
607 parallel construct and flase if it is only within a distribute
608 construct. */
609
610static bool
611grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
612 bool in_parallel)
613{
614 gimple_stmt_iterator gsi;
615 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
616 {
617 gimple *stmt = gsi_stmt (gsi);
618
619 if (grid_safe_assignment_p (stmt, grid)
620 || gimple_code (stmt) == GIMPLE_GOTO
621 || gimple_code (stmt) == GIMPLE_LABEL
622 || gimple_code (stmt) == GIMPLE_COND)
623 continue;
624 else if (gbind *bind = dyn_cast <gbind *> (stmt))
625 {
626 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
627 grid, in_parallel))
628 return false;
629 continue;
630 }
631 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
632 {
633 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
634 {
635 if (dump_enabled_p ())
636 {
637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
638 GRID_MISSED_MSG_PREFIX "the distribute "
639 "construct contains a try..catch region\n");
4f5b9c80 640 dump_printf_loc (MSG_NOTE, try_stmt,
629b3d75
MJ
641 "This statement cannot be analyzed for "
642 "tiled gridification\n");
643 }
644 return false;
645 }
646 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
647 grid, in_parallel))
648 return false;
649 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
650 grid, in_parallel))
651 return false;
652 continue;
653 }
654 else if (is_gimple_call (stmt))
655 {
656 tree fndecl = gimple_call_fndecl (stmt);
657 if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
658 continue;
659
660 if (dump_enabled_p ())
661 {
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
663 GRID_MISSED_MSG_PREFIX "the distribute "
664 "construct contains a call\n");
4f5b9c80 665 dump_printf_loc (MSG_NOTE, stmt,
629b3d75
MJ
666 "This statement cannot be analyzed for "
667 "tiled gridification\n");
668 }
669 return false;
670 }
671 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
672 {
673 if (in_parallel)
674 {
675 if (dump_enabled_p ())
676 {
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
678 GRID_MISSED_MSG_PREFIX "a parallel "
679 "construct contains another parallel "
680 "construct\n");
4f5b9c80 681 dump_printf_loc (MSG_NOTE, stmt,
629b3d75
MJ
682 "This parallel construct is nested in "
683 "another one\n");
684 }
685 return false;
686 }
687 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
688 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
689 grid, true))
690 return false;
691 }
692 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
693 {
694 if (!in_parallel)
695 {
696 if (dump_enabled_p ())
697 {
698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
699 GRID_MISSED_MSG_PREFIX "a loop "
700 "construct is not nested within a parallel "
701 "construct\n");
4f5b9c80 702 dump_printf_loc (MSG_NOTE, stmt,
629b3d75
MJ
703 "This loop construct is not nested in "
704 "a parallel construct\n");
705 }
706 return false;
707 }
708 if (!grid_gfor_follows_tiling_pattern (gfor, grid))
709 return false;
710 }
711 else
712 {
713 if (dump_enabled_p ())
714 {
715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
716 GRID_MISSED_MSG_PREFIX "the distribute "
717 "construct contains a complex statement\n");
4f5b9c80 718 dump_printf_loc (MSG_NOTE, stmt,
629b3d75
MJ
719 "This statement cannot be analyzed for "
720 "tiled gridification\n");
721 }
722 return false;
723 }
724 }
725 return true;
726}
727
728/* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
729 return true, otherwise return false. In the case of success, also fill in
730 GRID with information describing the kernel grid. */
731
732static bool
733grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
734{
735 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
736 return false;
737
4f5b9c80 738 dump_user_location_t tloc = target;
629b3d75
MJ
739 grid->target_loc = tloc;
740 gimple *stmt
741 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
742 grid, "target");
743 if (!stmt)
744 return false;
745 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
746 tree group_size = NULL;
747 if (!teams)
748 {
b2a8d77a
MJ
749 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
751 GRID_MISSED_MSG_PREFIX "it does not have a sole "
752 "teams construct in it.\n");
629b3d75
MJ
753 return false;
754 }
755
756 tree clauses = gimple_omp_teams_clauses (teams);
757 while (clauses)
758 {
759 switch (OMP_CLAUSE_CODE (clauses))
760 {
761 case OMP_CLAUSE_NUM_TEAMS:
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
764 GRID_MISSED_MSG_PREFIX "the teams construct "
765 "contains a num_teams clause\n ");
766 return false;
767
768 case OMP_CLAUSE_REDUCTION:
769 if (dump_enabled_p ())
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
771 GRID_MISSED_MSG_PREFIX "a reduction "
772 "clause is present\n ");
773 return false;
774
775 case OMP_CLAUSE_THREAD_LIMIT:
776 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
777 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
778 break;
779
780 default:
781 break;
782 }
783 clauses = OMP_CLAUSE_CHAIN (clauses);
784 }
785
786 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
787 "teams");
788 if (!stmt)
789 return false;
790 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
791 if (!dist)
792 {
b2a8d77a
MJ
793 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
795 GRID_MISSED_MSG_PREFIX "the teams construct does not "
796 "have a single distribute construct in it.\n");
629b3d75
MJ
797 return false;
798 }
799
800 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
801
802 grid->collapse = gimple_omp_for_collapse (dist);
803 if (grid->collapse > 3)
804 {
805 if (dump_enabled_p ())
806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
807 GRID_MISSED_MSG_PREFIX "the distribute construct "
808 "contains collapse clause with parameter greater "
809 "than 3\n");
810 return false;
811 }
812
813 struct omp_for_data fd;
814 struct omp_for_data_loop *dist_loops
815 = (struct omp_for_data_loop *)alloca (grid->collapse
816 * sizeof (struct omp_for_data_loop));
817 omp_extract_for_data (dist, &fd, dist_loops);
818 if (fd.chunk_size)
819 {
820 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
821 {
822 if (dump_enabled_p ())
823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
824 GRID_MISSED_MSG_PREFIX "the teams "
825 "thread limit is different from distribute "
826 "schedule chunk\n");
827 return false;
828 }
829 group_size = fd.chunk_size;
830 }
831 if (group_size && grid->collapse > 1)
832 {
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
835 GRID_MISSED_MSG_PREFIX "group size cannot be "
836 "set using thread_limit or schedule clauses "
837 "when also using a collapse clause greater than 1\n");
838 return false;
839 }
840
841 if (gimple_omp_for_combined_p (dist))
842 {
843 grid->tiling = false;
844 grid->group_sizes[0] = group_size;
845 for (unsigned i = 1; i < grid->collapse; i++)
846 grid->group_sizes[i] = NULL;
847 return grid_dist_follows_simple_pattern (dist, grid);
848 }
849 else
850 {
851 grid->tiling = true;
852 if (group_size)
853 {
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
856 GRID_MISSED_MSG_PREFIX "group size cannot be set "
857 "using thread_limit or schedule clauses when "
858 "distribute and loop constructs do not form "
859 "one combined construct\n");
860 return false;
861 }
862 for (unsigned i = 0; i < grid->collapse; i++)
863 {
864 if (fd.loops[i].cond_code == GT_EXPR)
865 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
866 TREE_TYPE (fd.loops[i].step),
867 fd.loops[i].step);
868 else
869 grid->group_sizes[i] = fd.loops[i].step;
870 }
871 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
872 false);
873 }
874}
875
876/* Operand walker, used to remap pre-body declarations according to a hash map
877 provided in DATA. */
878
879static tree
880grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
881{
882 tree t = *tp;
883
884 if (DECL_P (t) || TYPE_P (t))
885 *walk_subtrees = 0;
886 else
887 *walk_subtrees = 1;
888
889 if (VAR_P (t))
890 {
891 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
892 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
893 tree *repl = declmap->get (t);
894 if (repl)
895 *tp = *repl;
896 }
897 return NULL_TREE;
898}
899
900/* Identifiers of segments into which a particular variable should be places
901 when gridifying. */
902
903enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
904 GRID_SEGMENT_GLOBAL};
905
906/* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
907 builtin call into SEQ that will make sure the variable is always considered
908 address taken. */
909
910static void
911grid_mark_variable_segment (tree var, enum grid_var_segment segment)
912{
913 /* Making a non-addressable variables would require that we re-gimplify all
914 their uses. Fortunately, we do not have to do this because if they are
915 not addressable, it means they are not used in atomic or parallel
916 statements and so relaxed GPU consistency rules mean we can just keep them
01914336 917 private. */
629b3d75
MJ
918 if (!TREE_ADDRESSABLE (var))
919 return;
920
921 switch (segment)
922 {
923 case GRID_SEGMENT_GROUP:
924 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
925 NULL, DECL_ATTRIBUTES (var));
926 break;
927 case GRID_SEGMENT_GLOBAL:
928 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
929 NULL, DECL_ATTRIBUTES (var));
930 break;
931 default:
932 gcc_unreachable ();
933 }
934
935 if (!TREE_STATIC (var))
936 {
937 TREE_STATIC (var) = 1;
09e3944e
MJ
938 const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var));
939 SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix));
629b3d75
MJ
940 varpool_node::finalize_decl (var);
941 }
942
943}
944
945/* Copy leading register-type assignments to local variables in SRC to just
946 before DST, Creating temporaries, adjusting mapping of operands in WI and
947 remapping operands as necessary. Add any new temporaries to TGT_BIND.
948 Return the first statement that does not conform to grid_safe_assignment_p
949 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
950 variables in traversed bind statements so that they are put into the
951 appropriate segment. */
952
953static gimple *
954grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
955 gbind *tgt_bind,
956 enum grid_var_segment var_segment,
957 struct walk_stmt_info *wi)
958{
959 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
960 gimple_stmt_iterator gsi;
961 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
962 {
963 gimple *stmt = gsi_stmt (gsi);
964 if (gbind *bind = dyn_cast <gbind *> (stmt))
965 {
966 gimple *r = grid_copy_leading_local_assignments
967 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
968
969 if (var_segment != GRID_SEGMENT_PRIVATE)
01914336
MJ
970 for (tree var = gimple_bind_vars (bind);
971 var;
972 var = DECL_CHAIN (var))
629b3d75
MJ
973 grid_mark_variable_segment (var, var_segment);
974 if (r)
975 return r;
976 else
977 continue;
978 }
979 if (!grid_safe_assignment_p (stmt, NULL))
980 return stmt;
981 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
982 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
983 TREE_TYPE (lhs));
984 DECL_CONTEXT (repl) = current_function_decl;
985 gimple_bind_append_vars (tgt_bind, repl);
986
987 declmap->put (lhs, repl);
988 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
989 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
990 gsi_insert_before (dst, copy, GSI_SAME_STMT);
991 }
992 return NULL;
993}
994
995/* Statement walker function to make adjustments to statements within the
996 gridifed kernel copy. */
997
998static tree
999grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1000 struct walk_stmt_info *)
1001{
1002 *handled_ops_p = false;
1003 gimple *stmt = gsi_stmt (*gsi);
1004 if (gimple_code (stmt) == GIMPLE_OMP_FOR
dfa6e5b4 1005 && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
629b3d75
MJ
1006 {
1007 gomp_for *loop = as_a <gomp_for *> (stmt);
1008 tree clauses = gimple_omp_for_clauses (loop);
1009 tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
1010 if (cl)
1011 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
1012 else
1013 {
1014 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
1015 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
1016 OMP_CLAUSE_CHAIN (c) = clauses;
1017 gimple_omp_for_set_clauses (loop, c);
1018 }
1019 }
1020 return NULL_TREE;
1021}
1022
1023/* Given a PARLOOP that is a normal for looping construct but also a part of a
1024 combined construct with a simd loop, eliminate the simd loop. */
1025
1026static void
1027grid_eliminate_combined_simd_part (gomp_for *parloop)
1028{
1029 struct walk_stmt_info wi;
1030
1031 memset (&wi, 0, sizeof (wi));
1032 wi.val_only = true;
dfa6e5b4 1033 enum gf_mask msk = GF_OMP_FOR_KIND_SIMD;
629b3d75
MJ
1034 wi.info = (void *) &msk;
1035 walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
1036 gimple *stmt = (gimple *) wi.info;
1037 /* We expect that the SIMD id the only statement in the parallel loop. */
1038 gcc_assert (stmt
1039 && gimple_code (stmt) == GIMPLE_OMP_FOR
dfa6e5b4 1040 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
629b3d75
MJ
1041 && gimple_omp_for_combined_into_p (stmt)
1042 && !gimple_omp_for_combined_p (stmt));
1043 gomp_for *simd = as_a <gomp_for *> (stmt);
1044
1045 /* Copy over the iteration properties because the body refers to the index in
1046 the bottmom-most loop. */
1047 unsigned i, collapse = gimple_omp_for_collapse (parloop);
1048 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
1049 for (i = 0; i < collapse; i++)
1050 {
1051 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
1052 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
1053 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
1054 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
1055 }
1056
1057 tree *tgt= gimple_omp_for_clauses_ptr (parloop);
1058 while (*tgt)
1059 tgt = &OMP_CLAUSE_CHAIN (*tgt);
1060
28567c40
JJ
1061 /* Copy over all clauses, except for linear clauses, which are turned into
1062 private clauses, and all other simd-specific clauses, which are
629b3d75
MJ
1063 ignored. */
1064 tree *pc = gimple_omp_for_clauses_ptr (simd);
1065 while (*pc)
1066 {
1067 tree c = *pc;
4ed1ff7e 1068 switch (OMP_CLAUSE_CODE (c))
629b3d75
MJ
1069 {
1070 case OMP_CLAUSE_LINEAR:
1071 {
1072 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
1073 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
1074 OMP_CLAUSE_CHAIN (priv) = NULL;
1075 *tgt = priv;
1076 tgt = &OMP_CLAUSE_CHAIN (priv);
1077 pc = &OMP_CLAUSE_CHAIN (c);
1078 break;
1079 }
1080
1081 case OMP_CLAUSE_SAFELEN:
1082 case OMP_CLAUSE_SIMDLEN:
1083 case OMP_CLAUSE_ALIGNED:
1084 pc = &OMP_CLAUSE_CHAIN (c);
1085 break;
1086
1087 default:
1088 *pc = OMP_CLAUSE_CHAIN (c);
1089 OMP_CLAUSE_CHAIN (c) = NULL;
1090 *tgt = c;
28567c40 1091 tgt = &OMP_CLAUSE_CHAIN (c);
629b3d75
MJ
1092 break;
1093 }
1094 }
1095
1096 /* Finally, throw away the simd and mark the parallel loop as not
1097 combined. */
1098 gimple_omp_set_body (parloop, gimple_omp_body (simd));
1099 gimple_omp_for_set_combined_p (parloop, false);
1100}
1101
1102/* Statement walker function marking all parallels as grid_phony and loops as
1103 grid ones representing threads of a particular thread group. */
1104
1105static tree
1106grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1107 struct walk_stmt_info *wi_in)
1108{
1109 *handled_ops_p = false;
1110 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
1111 {
1112 *handled_ops_p = true;
1113 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
1114 gimple_omp_for_set_grid_intra_group (loop, true);
1115 if (gimple_omp_for_combined_p (loop))
1116 grid_eliminate_combined_simd_part (loop);
1117
1118 struct walk_stmt_info body_wi;
1119 memset (&body_wi, 0, sizeof (body_wi));
1120 walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
1121 grid_process_grid_body, NULL, &body_wi);
1122
1123 gbind *bind = (gbind *) wi_in->info;
1124 tree c;
1125 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
1126 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
1127 {
1128 push_gimplify_context ();
1129 tree ov = OMP_CLAUSE_DECL (c);
1130 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
1131 TREE_TYPE (ov));
1132
1133 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
1134 DECL_CONTEXT (gv) = current_function_decl;
1135 gimple_bind_append_vars (bind, gv);
1136 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
1137 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
1138 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
1139 gimple_seq l = NULL;
1140 gimplify_and_add (x, &l);
1141 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
1142 pop_gimplify_context (bind);
1143 }
1144 }
1145 return NULL_TREE;
1146}
1147
1148/* Statement walker function marking all parallels as grid_phony and loops as
1149 grid ones representing threads of a particular thread group. */
1150
1151static tree
1152grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
1153 bool *handled_ops_p,
1154 struct walk_stmt_info *wi_in)
1155{
1156 *handled_ops_p = false;
1157 wi_in->removed_stmt = false;
1158 gimple *stmt = gsi_stmt (*gsi);
1159 if (gbind *bind = dyn_cast <gbind *> (stmt))
1160 {
1161 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
1162 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
1163 }
1164 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
1165 {
1166 *handled_ops_p = true;
1167 gimple_omp_parallel_set_grid_phony (parallel, true);
1168
1169 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
1170 gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
1171 gimple_seq s = NULL;
1172 gimple_seq_add_stmt (&s, new_bind);
1173 gimple_omp_set_body (parallel, s);
1174
1175 struct walk_stmt_info wi_par;
1176 memset (&wi_par, 0, sizeof (wi_par));
1177 wi_par.info = new_bind;
1178 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
1179 grid_mark_tiling_loops, NULL, &wi_par);
1180 }
1181 else if (is_a <gcall *> (stmt))
1182 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
1183 return NULL_TREE;
1184}
1185
1186/* Given freshly copied top level kernel SEQ, identify the individual OMP
1187 components, mark them as part of kernel, copy assignment leading to them
1188 just before DST, remapping them using WI and adding new temporaries to
700d4cb0 1189 TGT_BIND, and return the loop that will be used for kernel dispatch. */
629b3d75
MJ
1190
1191static gomp_for *
1192grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
1193 gimple_stmt_iterator *dst,
1194 gbind *tgt_bind, struct walk_stmt_info *wi)
1195{
1196 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
1197 GRID_SEGMENT_GLOBAL, wi);
1198 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
1199 gcc_assert (teams);
1200 gimple_omp_teams_set_grid_phony (teams, true);
1201 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
01914336
MJ
1202 tgt_bind, GRID_SEGMENT_GLOBAL,
1203 wi);
629b3d75
MJ
1204 gcc_checking_assert (stmt);
1205 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
1206 gcc_assert (dist);
1207 gimple_seq prebody = gimple_omp_for_pre_body (dist);
1208 if (prebody)
1209 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1210 GRID_SEGMENT_GROUP, wi);
1211
1212 if (grid->tiling)
1213 {
1214 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
1215 gimple_omp_for_set_grid_group_iter (dist, true);
1216
1217 struct walk_stmt_info wi_tiled;
1218 memset (&wi_tiled, 0, sizeof (wi_tiled));
1219 walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
1220 grid_mark_tiling_parallels_and_loops, NULL,
1221 &wi_tiled);
1222 return dist;
1223 }
1224 else
1225 {
1226 gimple_omp_for_set_grid_phony (dist, true);
1227 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
1228 tgt_bind,
1229 GRID_SEGMENT_PRIVATE, wi);
1230 gcc_checking_assert (stmt);
1231 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
1232 gimple_omp_parallel_set_grid_phony (parallel, true);
1233 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
1234 dst, tgt_bind,
1235 GRID_SEGMENT_PRIVATE, wi);
1236 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
1237 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
1238 prebody = gimple_omp_for_pre_body (inner_loop);
1239 if (prebody)
1240 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1241 GRID_SEGMENT_PRIVATE, wi);
1242
1243 if (gimple_omp_for_combined_p (inner_loop))
1244 grid_eliminate_combined_simd_part (inner_loop);
5de73c05 1245 struct walk_stmt_info body_wi;
629b3d75
MJ
1246 memset (&body_wi, 0, sizeof (body_wi));
1247 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
1248 grid_process_grid_body, NULL, &body_wi);
1249
1250 return inner_loop;
1251 }
1252}
1253
1254/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1255 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
1256 is the bind into which temporaries inserted before TARGET should be
1257 added. */
1258
1259static void
1260grid_attempt_target_gridification (gomp_target *target,
1261 gimple_stmt_iterator *gsi,
1262 gbind *tgt_bind)
1263{
1264 /* removed group_size */
4f5b9c80 1265 grid_prop grid = {};
629b3d75
MJ
1266 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
1267 return;
1268
1269 location_t loc = gimple_location (target);
1270 if (dump_enabled_p ())
4f5b9c80 1271 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
629b3d75
MJ
1272 "Target construct will be turned into a gridified HSA "
1273 "kernel\n");
1274
1275 /* Copy target body to a GPUKERNEL construct: */
1276 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
1277 (gimple_omp_body (target));
1278
1279 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
1280 struct walk_stmt_info wi;
1281 memset (&wi, 0, sizeof (struct walk_stmt_info));
1282 wi.info = declmap;
1283
1284 /* Copy assignments in between OMP statements before target, mark OMP
1285 statements within copy appropriately. */
1286 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
1287 tgt_bind, &wi);
1288
01914336
MJ
1289 gbind *old_bind
1290 = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
629b3d75
MJ
1291 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
1292 tree new_block = gimple_bind_block (new_bind);
1293 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
1294 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
1295 BLOCK_SUBBLOCKS (enc_block) = new_block;
1296 BLOCK_SUPERCONTEXT (new_block) = enc_block;
1297 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
1298 gimple_seq_add_stmt
1299 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
1300 gpukernel);
1301
1302 for (size_t i = 0; i < grid.collapse; i++)
1303 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
1304 push_gimplify_context ();
1305 for (size_t i = 0; i < grid.collapse; i++)
1306 {
031c5c8b
MJ
1307 tree index_var = gimple_omp_for_index (inner_loop, i);
1308 tree itype, type = TREE_TYPE (index_var);
629b3d75
MJ
1309 if (POINTER_TYPE_P (type))
1310 itype = signed_type_for (type);
1311 else
1312 itype = type;
1313
1314 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
1315 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
1316 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
1317 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
1318 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
031c5c8b
MJ
1319 tree step
1320 = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
1321 omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step);
629b3d75
MJ
1322 n1 = fold_convert (itype, n1);
1323 n2 = fold_convert (itype, n2);
1324
791929c9 1325 tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
629b3d75
MJ
1326
1327 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
1328 t = fold_build2 (PLUS_EXPR, itype, step, t);
1329 t = fold_build2 (PLUS_EXPR, itype, t, n2);
1330 t = fold_build2 (MINUS_EXPR, itype, t, n1);
1331 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
1332 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1333 fold_build1 (NEGATE_EXPR, itype, t),
1334 fold_build1 (NEGATE_EXPR, itype, step));
1335 else
1336 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
791929c9 1337 t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
629b3d75 1338 if (grid.tiling)
01914336
MJ
1339 {
1340 if (cond_code == GT_EXPR)
1341 step = fold_build1 (NEGATE_EXPR, itype, step);
1342 t = fold_build2 (MULT_EXPR, itype, t, step);
1343 }
629b3d75
MJ
1344
1345 tree gs = fold_convert (uint32_type_node, t);
1346 gimple_seq tmpseq = NULL;
1347 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1348 if (!gimple_seq_empty_p (tmpseq))
1349 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1350
1351 tree ws;
1352 if (grid.group_sizes[i])
1353 {
1354 ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
1355 tmpseq = NULL;
1356 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1357 if (!gimple_seq_empty_p (tmpseq))
1358 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1359 }
1360 else
1361 ws = build_zero_cst (uint32_type_node);
1362
1363 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
1364 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
1365 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
1366 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
1367 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
1368 gimple_omp_target_set_clauses (target, c);
1369 }
1370 pop_gimplify_context (tgt_bind);
1371 delete declmap;
1372 return;
1373}
1374
01914336 1375/* Walker function doing all the work for create_target_kernels. */
629b3d75
MJ
1376
1377static tree
1378grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
1379 bool *handled_ops_p,
1380 struct walk_stmt_info *incoming)
1381{
1382 *handled_ops_p = false;
1383
1384 gimple *stmt = gsi_stmt (*gsi);
1385 gomp_target *target = dyn_cast <gomp_target *> (stmt);
1386 if (target)
1387 {
1388 gbind *tgt_bind = (gbind *) incoming->info;
1389 gcc_checking_assert (tgt_bind);
1390 grid_attempt_target_gridification (target, gsi, tgt_bind);
1391 return NULL_TREE;
1392 }
1393 gbind *bind = dyn_cast <gbind *> (stmt);
1394 if (bind)
1395 {
1396 *handled_ops_p = true;
1397 struct walk_stmt_info wi;
1398 memset (&wi, 0, sizeof (wi));
1399 wi.info = bind;
1400 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
1401 grid_gridify_all_targets_stmt, NULL, &wi);
1402 }
1403 return NULL_TREE;
1404}
1405
1406/* Attempt to gridify all target constructs in BODY_P. All such targets will
1407 have their bodies duplicated, with the new copy being put into a
1408 gimple_omp_grid_body statement. All kernel-related construct within the
1409 grid_body will be marked with phony flags or kernel kinds. Moreover, some
1410 re-structuring is often needed, such as copying pre-bodies before the target
1411 construct so that kernel grid sizes can be computed. */
1412
1413void
1414omp_grid_gridify_all_targets (gimple_seq *body_p)
1415{
1416 struct walk_stmt_info wi;
1417 memset (&wi, 0, sizeof (wi));
1418 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
1419}