1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
27 #include "tree-pass.h"
30 #include "pretty-print.h"
31 #include "fold-const.h"
33 #include "gimple-iterator.h"
34 #include "gimple-walk.h"
35 #include "tree-inline.h"
36 #include "langhooks.h"
37 #include "omp-general.h"
40 #include "gimple-pretty-print.h"
42 /* Return the lastprivate predicate for a given gridified loop described by
46 omp_grid_lastprivate_predicate (struct omp_for_data
*fd
)
48 /* When dealing with a gridified loop, we need to check up to three collapsed
49 iteration variables but they are not actually captured in this fd.
50 Fortunately, we can easily rely on HSA builtins to get this
54 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_GRID_LOOP
55 && gimple_omp_for_grid_intra_group (fd
->for_stmt
))
57 id
= builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID
);
58 size
= builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE
);
62 id
= builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID
);
63 size
= builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE
);
66 for (int dim
= 0; dim
< fd
->collapse
; dim
++)
68 tree dim_tree
= build_int_cstu (unsigned_type_node
, dim
);
69 tree u1
= build_int_cstu (unsigned_type_node
, 1);
71 = build2 (EQ_EXPR
, boolean_type_node
,
72 build2 (PLUS_EXPR
, unsigned_type_node
,
73 build_call_expr (id
, 1, dim_tree
), u1
),
74 build_call_expr (size
, 1, dim_tree
));
76 cond
= build2 (TRUTH_AND_EXPR
, boolean_type_node
, cond
, c2
);
83 /* Structure describing the basic properties of the loop we ara analyzing
84 whether it can be gridified and when it is gridified. */
88 /* True when we are doing tiling gridification, i.e. when there is a distinct
89 distribute loop over groups and a loop construct over work-items. False
90 when distribute and parallel for loops form a combined construct. */
92 /* Location of the target construct for optimization information
94 dump_user_location_t target_loc
;
95 /* The collapse clause of the involved loops. Collapse value of all of them
96 must be the same for gridification to take place. */
98 /* Group sizes, if requested by the user or NULL if not requested. */
102 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
103 "gridified HSA kernel because "
105 /* Return true if STMT is an assignment of a register-type into a local
106 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
107 any of the trees specifying group sizes there. */
110 grid_safe_assignment_p (gimple
*stmt
, grid_prop
*grid
)
112 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
115 if (gimple_clobber_p (assign
))
117 tree lhs
= gimple_assign_lhs (assign
);
119 || !is_gimple_reg_type (TREE_TYPE (lhs
))
120 || is_global_var (lhs
))
123 for (unsigned i
= 0; i
< grid
->collapse
; i
++)
124 if (lhs
== grid
->group_sizes
[i
])
129 /* Return true if all statements in SEQ are assignments to local register-type
130 variables that do not hold group size information. */
133 grid_seq_only_contains_local_assignments (gimple_seq seq
, grid_prop
*grid
)
138 gimple_stmt_iterator gsi
;
139 for (gsi
= gsi_start (seq
); !gsi_end_p (gsi
); gsi_next (&gsi
))
140 if (!grid_safe_assignment_p (gsi_stmt (gsi
), grid
))
145 /* Scan statements in SEQ and call itself recursively on any bind. GRID
146 describes hitherto discovered properties of the loop that is evaluated for
147 possible gridification. If during whole search only assignments to
148 register-type local variables (that do not overwrite group size information)
149 and one single OMP statement is encountered, return true, otherwise return
150 false. RET is where we store any OMP statement encountered. */
153 grid_find_single_omp_among_assignments_1 (gimple_seq seq
, grid_prop
*grid
,
154 const char *name
, gimple
**ret
)
156 gimple_stmt_iterator gsi
;
157 for (gsi
= gsi_start (seq
); !gsi_end_p (gsi
); gsi_next (&gsi
))
159 gimple
*stmt
= gsi_stmt (gsi
);
161 if (grid_safe_assignment_p (stmt
, grid
))
163 if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
165 gimple_seq bind_body
= gimple_bind_body (bind
);
166 if (!grid_find_single_omp_among_assignments_1 (bind_body
, grid
, name
,
170 else if (is_gimple_omp (stmt
))
174 if (dump_enabled_p ())
176 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
177 GRID_MISSED_MSG_PREFIX
"%s construct "
178 "contains multiple OpenMP constructs\n",
180 dump_printf_loc (MSG_NOTE
, *ret
,
181 "The first OpenMP construct within "
183 dump_printf_loc (MSG_NOTE
, stmt
,
184 "The second OpenMP construct within "
193 if (dump_enabled_p ())
195 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
196 GRID_MISSED_MSG_PREFIX
"%s construct contains "
197 "a complex statement\n", name
);
198 dump_printf_loc (MSG_NOTE
, stmt
,
199 "This statement cannot be analyzed for "
208 /* Scan statements in SEQ and make sure that it and any binds in it contain
209 only assignments to local register-type variables (that do not overwrite
210 group size information) and one OMP construct. If so, return that
211 construct, otherwise return NULL. GRID describes hitherto discovered
212 properties of the loop that is evaluated for possible gridification. If
213 dumping is enabled and function fails, use NAME to dump a note with the
214 reason for failure. */
217 grid_find_single_omp_among_assignments (gimple_seq seq
, grid_prop
*grid
,
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
224 GRID_MISSED_MSG_PREFIX
"%s construct has empty body\n",
230 if (grid_find_single_omp_among_assignments_1 (seq
, grid
, name
, &ret
))
232 if (!ret
&& dump_enabled_p ())
233 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
234 GRID_MISSED_MSG_PREFIX
"%s construct does not contain"
235 " any other OpenMP construct\n", name
);
242 /* Walker function looking for statements there is no point gridifying (and for
243 noreturn function calls which we cannot do). Return non-NULL if such a
244 function is found. */
247 grid_find_ungridifiable_statement (gimple_stmt_iterator
*gsi
,
249 struct walk_stmt_info
*wi
)
251 *handled_ops_p
= false;
252 gimple
*stmt
= gsi_stmt (*gsi
);
253 switch (gimple_code (stmt
))
256 if (gimple_call_noreturn_p (as_a
<gcall
*> (stmt
)))
258 *handled_ops_p
= true;
260 return error_mark_node
;
264 /* We may reduce the following list if we find a way to implement the
265 clauses, but now there is no point trying further. */
266 case GIMPLE_OMP_CRITICAL
:
267 case GIMPLE_OMP_TASKGROUP
:
268 case GIMPLE_OMP_TASK
:
269 case GIMPLE_OMP_SECTION
:
270 case GIMPLE_OMP_SECTIONS
:
271 case GIMPLE_OMP_SECTIONS_SWITCH
:
272 case GIMPLE_OMP_TARGET
:
273 case GIMPLE_OMP_ORDERED
:
274 *handled_ops_p
= true;
276 return error_mark_node
;
283 /* Examine clauses of omp parallel statement PAR and if any prevents
284 gridification, issue a missed-optimization diagnostics and return false,
285 otherwise return true. GRID describes hitherto discovered properties of the
286 loop that is evaluated for possible gridification. */
289 grid_parallel_clauses_gridifiable (gomp_parallel
*par
, dump_user_location_t tloc
)
291 tree clauses
= gimple_omp_parallel_clauses (par
);
294 switch (OMP_CLAUSE_CODE (clauses
))
296 case OMP_CLAUSE_NUM_THREADS
:
297 if (dump_enabled_p ())
299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
300 GRID_MISSED_MSG_PREFIX
"because there is "
301 "a num_threads clause of the parallel "
303 dump_printf_loc (MSG_NOTE
, par
,
304 "Parallel construct has a num_threads clause\n");
308 case OMP_CLAUSE_REDUCTION
:
309 if (dump_enabled_p ())
311 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
312 GRID_MISSED_MSG_PREFIX
"a reduction clause "
314 dump_printf_loc (MSG_NOTE
, par
,
315 "Parallel construct has a reduction clause\n");
322 clauses
= OMP_CLAUSE_CHAIN (clauses
);
327 /* Examine clauses and the body of omp loop statement GFOR and if something
328 prevents gridification, issue a missed-optimization diagnostics and return
329 false, otherwise return true. GRID describes hitherto discovered properties
330 of the loop that is evaluated for possible gridification. */
333 grid_inner_loop_gridifiable_p (gomp_for
*gfor
, grid_prop
*grid
)
335 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor
),
338 if (dump_enabled_p ())
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
341 GRID_MISSED_MSG_PREFIX
"the inner loop "
342 "loop bounds computation contains a complex "
344 dump_printf_loc (MSG_NOTE
, gfor
,
345 "Loop construct cannot be analyzed for "
351 tree clauses
= gimple_omp_for_clauses (gfor
);
354 switch (OMP_CLAUSE_CODE (clauses
))
356 case OMP_CLAUSE_SCHEDULE
:
357 if (OMP_CLAUSE_SCHEDULE_KIND (clauses
) != OMP_CLAUSE_SCHEDULE_AUTO
)
359 if (dump_enabled_p ())
361 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
362 GRID_MISSED_MSG_PREFIX
"the inner loop "
363 "has a non-automatic schedule clause\n");
364 dump_printf_loc (MSG_NOTE
, gfor
,
365 "Loop construct has a non automatic "
366 "schedule clause\n");
372 case OMP_CLAUSE_REDUCTION
:
373 if (dump_enabled_p ())
375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
376 GRID_MISSED_MSG_PREFIX
"a reduction "
377 "clause is present\n ");
378 dump_printf_loc (MSG_NOTE
, gfor
,
379 "Loop construct has a reduction schedule "
387 clauses
= OMP_CLAUSE_CHAIN (clauses
);
389 struct walk_stmt_info wi
;
390 memset (&wi
, 0, sizeof (wi
));
391 if (walk_gimple_seq (gimple_omp_body (gfor
),
392 grid_find_ungridifiable_statement
,
395 gimple
*bad
= (gimple
*) wi
.info
;
396 if (dump_enabled_p ())
398 if (is_gimple_call (bad
))
399 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
400 GRID_MISSED_MSG_PREFIX
"the inner loop contains "
401 "call to a noreturn function\n");
403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
404 GRID_MISSED_MSG_PREFIX
"the inner loop contains "
405 "statement %s which cannot be transformed\n",
406 gimple_code_name
[(int) gimple_code (bad
)]);
407 dump_printf_loc (MSG_NOTE
, bad
,
408 "This statement cannot be analyzed for "
416 /* Given distribute omp construct represented by DIST, which in the original
417 source forms a compound construct with a looping construct, return true if it
418 can be turned into a gridified HSA kernel. Otherwise return false. GRID
419 describes hitherto discovered properties of the loop that is evaluated for
420 possible gridification. */
423 grid_dist_follows_simple_pattern (gomp_for
*dist
, grid_prop
*grid
)
425 dump_user_location_t tloc
= grid
->target_loc
;
426 gimple
*stmt
= grid_find_single_omp_among_assignments (gimple_omp_body (dist
),
430 || !(par
= dyn_cast
<gomp_parallel
*> (stmt
))
431 || !grid_parallel_clauses_gridifiable (par
, tloc
))
434 stmt
= grid_find_single_omp_among_assignments (gimple_omp_body (par
), grid
,
437 if (!stmt
|| !(gfor
= dyn_cast
<gomp_for
*> (stmt
)))
440 if (gimple_omp_for_kind (gfor
) != GF_OMP_FOR_KIND_FOR
)
442 if (dump_enabled_p ())
443 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
444 GRID_MISSED_MSG_PREFIX
"the inner loop is not "
445 "a simple for loop\n");
448 gcc_assert (gimple_omp_for_collapse (gfor
) == grid
->collapse
);
450 if (!grid_inner_loop_gridifiable_p (gfor
, grid
))
456 /* Given an omp loop statement GFOR, return true if it can participate in
457 tiling gridification, i.e. in one where the distribute and parallel for
458 loops do not form a compound statement. GRID describes hitherto discovered
459 properties of the loop that is evaluated for possible gridification. */
462 grid_gfor_follows_tiling_pattern (gomp_for
*gfor
, grid_prop
*grid
)
464 if (gimple_omp_for_kind (gfor
) != GF_OMP_FOR_KIND_FOR
)
466 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
469 GRID_MISSED_MSG_PREFIX
"an inner loop is not "
470 "a simple for loop\n");
471 dump_printf_loc (MSG_NOTE
, gfor
,
472 "This statement is not a simple for loop\n");
477 if (!grid_inner_loop_gridifiable_p (gfor
, grid
))
480 if (gimple_omp_for_collapse (gfor
) != grid
->collapse
)
482 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
485 GRID_MISSED_MSG_PREFIX
"an inner loop does not "
486 "have use the same collapse clause\n");
487 dump_printf_loc (MSG_NOTE
, gfor
,
488 "Loop construct uses a different collapse clause\n");
493 struct omp_for_data fd
;
494 struct omp_for_data_loop
*loops
495 = (struct omp_for_data_loop
*)alloca (grid
->collapse
496 * sizeof (struct omp_for_data_loop
));
497 omp_extract_for_data (gfor
, &fd
, loops
);
498 for (unsigned i
= 0; i
< grid
->collapse
; i
++)
500 tree itype
, type
= TREE_TYPE (fd
.loops
[i
].v
);
501 if (POINTER_TYPE_P (type
))
502 itype
= signed_type_for (type
);
506 tree n1
= fold_convert (itype
, fd
.loops
[i
].n1
);
507 tree n2
= fold_convert (itype
, fd
.loops
[i
].n2
);
508 tree t
= build_int_cst (itype
,
509 (fd
.loops
[i
].cond_code
== LT_EXPR
? -1 : 1));
510 t
= fold_build2 (PLUS_EXPR
, itype
, fd
.loops
[i
].step
, t
);
511 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
512 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1
);
513 if (TYPE_UNSIGNED (itype
) && fd
.loops
[i
].cond_code
== GT_EXPR
)
514 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
515 fold_build1 (NEGATE_EXPR
, itype
, t
),
516 fold_build1 (NEGATE_EXPR
, itype
, fd
.loops
[i
].step
));
518 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, fd
.loops
[i
].step
);
520 if (!operand_equal_p (grid
->group_sizes
[i
], t
, 0))
522 if (dump_enabled_p ())
524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
525 GRID_MISSED_MSG_PREFIX
"the distribute and "
526 "an internal loop do not agree on tile size\n");
527 dump_printf_loc (MSG_NOTE
, gfor
,
528 "Loop construct does not seem to loop over "
537 /* Facing a call to FNDECL in the body of a distribute construct, return true
538 if we can handle it or false if it precludes gridification. */
541 grid_call_permissible_in_distribute_p (tree fndecl
)
543 if (DECL_PURE_P (fndecl
) || TREE_READONLY (fndecl
))
546 const char *name
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
547 if (strstr (name
, "omp_") != name
)
550 if ((strcmp (name
, "omp_get_thread_num") == 0)
551 || (strcmp (name
, "omp_get_num_threads") == 0)
552 || (strcmp (name
, "omp_get_num_teams") == 0)
553 || (strcmp (name
, "omp_get_team_num") == 0)
554 || (strcmp (name
, "omp_get_level") == 0)
555 || (strcmp (name
, "omp_get_active_level") == 0)
556 || (strcmp (name
, "omp_in_parallel") == 0))
562 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
563 of a distribute construct that is pointed at by GSI, modify it as necessary
564 for gridification. If the statement itself got removed, return true. */
567 grid_handle_call_in_distribute (gimple_stmt_iterator
*gsi
)
569 gimple
*stmt
= gsi_stmt (*gsi
);
570 tree fndecl
= gimple_call_fndecl (stmt
);
571 gcc_checking_assert (stmt
);
572 if (DECL_PURE_P (fndecl
) || TREE_READONLY (fndecl
))
575 const char *name
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
576 if ((strcmp (name
, "omp_get_thread_num") == 0)
577 || (strcmp (name
, "omp_get_level") == 0)
578 || (strcmp (name
, "omp_get_active_level") == 0)
579 || (strcmp (name
, "omp_in_parallel") == 0))
581 tree lhs
= gimple_call_lhs (stmt
);
585 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
586 gsi_insert_before (gsi
, assign
, GSI_SAME_STMT
);
588 gsi_remove (gsi
, true);
592 /* The rest of the omp functions can stay as they are, HSA back-end will
593 handle them correctly. */
594 gcc_checking_assert ((strcmp (name
, "omp_get_num_threads") == 0)
595 || (strcmp (name
, "omp_get_num_teams") == 0)
596 || (strcmp (name
, "omp_get_team_num") == 0));
600 /* Given a sequence of statements within a distribute omp construct or a
601 parallel construct, which in the original source does not form a compound
602 construct with a looping construct, return true if it does not prevent us
603 from turning it into a gridified HSA kernel. Otherwise return false. GRID
604 describes hitherto discovered properties of the loop that is evaluated for
605 possible gridification. IN_PARALLEL must be true if seq is within a
606 parallel construct and flase if it is only within a distribute
610 grid_dist_follows_tiling_pattern (gimple_seq seq
, grid_prop
*grid
,
613 gimple_stmt_iterator gsi
;
614 for (gsi
= gsi_start (seq
); !gsi_end_p (gsi
); gsi_next (&gsi
))
616 gimple
*stmt
= gsi_stmt (gsi
);
618 if (grid_safe_assignment_p (stmt
, grid
)
619 || gimple_code (stmt
) == GIMPLE_GOTO
620 || gimple_code (stmt
) == GIMPLE_LABEL
621 || gimple_code (stmt
) == GIMPLE_COND
)
623 else if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
625 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind
),
630 else if (gtry
*try_stmt
= dyn_cast
<gtry
*> (stmt
))
632 if (gimple_try_kind (try_stmt
) == GIMPLE_TRY_CATCH
)
634 if (dump_enabled_p ())
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
637 GRID_MISSED_MSG_PREFIX
"the distribute "
638 "construct contains a try..catch region\n");
639 dump_printf_loc (MSG_NOTE
, try_stmt
,
640 "This statement cannot be analyzed for "
641 "tiled gridification\n");
645 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt
),
648 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt
),
653 else if (is_gimple_call (stmt
))
655 tree fndecl
= gimple_call_fndecl (stmt
);
656 if (fndecl
&& grid_call_permissible_in_distribute_p (fndecl
))
659 if (dump_enabled_p ())
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
662 GRID_MISSED_MSG_PREFIX
"the distribute "
663 "construct contains a call\n");
664 dump_printf_loc (MSG_NOTE
, stmt
,
665 "This statement cannot be analyzed for "
666 "tiled gridification\n");
670 else if (gomp_parallel
*par
= dyn_cast
<gomp_parallel
*> (stmt
))
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
677 GRID_MISSED_MSG_PREFIX
"a parallel "
678 "construct contains another parallel "
680 dump_printf_loc (MSG_NOTE
, stmt
,
681 "This parallel construct is nested in "
686 if (!grid_parallel_clauses_gridifiable (par
, grid
->target_loc
)
687 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par
),
691 else if (gomp_for
*gfor
= dyn_cast
<gomp_for
*> (stmt
))
695 if (dump_enabled_p ())
697 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
698 GRID_MISSED_MSG_PREFIX
"a loop "
699 "construct is not nested within a parallel "
701 dump_printf_loc (MSG_NOTE
, stmt
,
702 "This loop construct is not nested in "
703 "a parallel construct\n");
707 if (!grid_gfor_follows_tiling_pattern (gfor
, grid
))
712 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
715 GRID_MISSED_MSG_PREFIX
"the distribute "
716 "construct contains a complex statement\n");
717 dump_printf_loc (MSG_NOTE
, stmt
,
718 "This statement cannot be analyzed for "
719 "tiled gridification\n");
727 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
728 return true, otherwise return false. In the case of success, also fill in
729 GRID with information describing the kernel grid. */
732 grid_target_follows_gridifiable_pattern (gomp_target
*target
, grid_prop
*grid
)
734 if (gimple_omp_target_kind (target
) != GF_OMP_TARGET_KIND_REGION
)
737 dump_user_location_t tloc
= target
;
738 grid
->target_loc
= tloc
;
740 = grid_find_single_omp_among_assignments (gimple_omp_body (target
),
744 gomp_teams
*teams
= dyn_cast
<gomp_teams
*> (stmt
);
745 tree group_size
= NULL
;
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
750 GRID_MISSED_MSG_PREFIX
"it does not have a sole "
751 "teams construct in it.\n");
755 tree clauses
= gimple_omp_teams_clauses (teams
);
758 switch (OMP_CLAUSE_CODE (clauses
))
760 case OMP_CLAUSE_NUM_TEAMS
:
761 if (dump_enabled_p ())
762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
763 GRID_MISSED_MSG_PREFIX
"the teams construct "
764 "contains a num_teams clause\n ");
767 case OMP_CLAUSE_REDUCTION
:
768 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
770 GRID_MISSED_MSG_PREFIX
"a reduction "
771 "clause is present\n ");
774 case OMP_CLAUSE_THREAD_LIMIT
:
775 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses
, 0)))
776 group_size
= OMP_CLAUSE_OPERAND (clauses
, 0);
782 clauses
= OMP_CLAUSE_CHAIN (clauses
);
785 stmt
= grid_find_single_omp_among_assignments (gimple_omp_body (teams
), grid
,
789 gomp_for
*dist
= dyn_cast
<gomp_for
*> (stmt
);
792 if (dump_enabled_p ())
793 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
794 GRID_MISSED_MSG_PREFIX
"the teams construct does not "
795 "have a single distribute construct in it.\n");
799 gcc_assert (gimple_omp_for_kind (dist
) == GF_OMP_FOR_KIND_DISTRIBUTE
);
801 grid
->collapse
= gimple_omp_for_collapse (dist
);
802 if (grid
->collapse
> 3)
804 if (dump_enabled_p ())
805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
806 GRID_MISSED_MSG_PREFIX
"the distribute construct "
807 "contains collapse clause with parameter greater "
812 struct omp_for_data fd
;
813 struct omp_for_data_loop
*dist_loops
814 = (struct omp_for_data_loop
*)alloca (grid
->collapse
815 * sizeof (struct omp_for_data_loop
));
816 omp_extract_for_data (dist
, &fd
, dist_loops
);
819 if (group_size
&& !operand_equal_p (group_size
, fd
.chunk_size
, 0))
821 if (dump_enabled_p ())
822 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
823 GRID_MISSED_MSG_PREFIX
"the teams "
824 "thread limit is different from distribute "
828 group_size
= fd
.chunk_size
;
830 if (group_size
&& grid
->collapse
> 1)
832 if (dump_enabled_p ())
833 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
834 GRID_MISSED_MSG_PREFIX
"group size cannot be "
835 "set using thread_limit or schedule clauses "
836 "when also using a collapse clause greater than 1\n");
840 if (gimple_omp_for_combined_p (dist
))
842 grid
->tiling
= false;
843 grid
->group_sizes
[0] = group_size
;
844 for (unsigned i
= 1; i
< grid
->collapse
; i
++)
845 grid
->group_sizes
[i
] = NULL
;
846 return grid_dist_follows_simple_pattern (dist
, grid
);
853 if (dump_enabled_p ())
854 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
855 GRID_MISSED_MSG_PREFIX
"group size cannot be set "
856 "using thread_limit or schedule clauses when "
857 "distribute and loop constructs do not form "
858 "one combined construct\n");
861 for (unsigned i
= 0; i
< grid
->collapse
; i
++)
863 if (fd
.loops
[i
].cond_code
== GT_EXPR
)
864 grid
->group_sizes
[i
] = fold_build1 (NEGATE_EXPR
,
865 TREE_TYPE (fd
.loops
[i
].step
),
868 grid
->group_sizes
[i
] = fd
.loops
[i
].step
;
870 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist
), grid
,
875 /* Operand walker, used to remap pre-body declarations according to a hash map
879 grid_remap_prebody_decls (tree
*tp
, int *walk_subtrees
, void *data
)
883 if (DECL_P (t
) || TYPE_P (t
))
890 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
891 hash_map
<tree
, tree
> *declmap
= (hash_map
<tree
, tree
> *) wi
->info
;
892 tree
*repl
= declmap
->get (t
);
899 /* Identifiers of segments into which a particular variable should be places
902 enum grid_var_segment
{GRID_SEGMENT_PRIVATE
, GRID_SEGMENT_GROUP
,
903 GRID_SEGMENT_GLOBAL
};
905 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
906 builtin call into SEQ that will make sure the variable is always considered
910 grid_mark_variable_segment (tree var
, enum grid_var_segment segment
)
912 /* Making a non-addressable variables would require that we re-gimplify all
913 their uses. Fortunately, we do not have to do this because if they are
914 not addressable, it means they are not used in atomic or parallel
915 statements and so relaxed GPU consistency rules mean we can just keep them
917 if (!TREE_ADDRESSABLE (var
))
922 case GRID_SEGMENT_GROUP
:
923 DECL_ATTRIBUTES (var
) = tree_cons (get_identifier ("hsa_group_segment"),
924 NULL
, DECL_ATTRIBUTES (var
));
926 case GRID_SEGMENT_GLOBAL
:
927 DECL_ATTRIBUTES (var
) = tree_cons (get_identifier ("hsa_global_segment"),
928 NULL
, DECL_ATTRIBUTES (var
));
934 if (!TREE_STATIC (var
))
936 TREE_STATIC (var
) = 1;
937 const char *prefix
= IDENTIFIER_POINTER (DECL_NAME (var
));
938 SET_DECL_ASSEMBLER_NAME (var
, create_tmp_var_name (prefix
));
939 varpool_node::finalize_decl (var
);
944 /* Copy leading register-type assignments to local variables in SRC to just
945 before DST, Creating temporaries, adjusting mapping of operands in WI and
946 remapping operands as necessary. Add any new temporaries to TGT_BIND.
947 Return the first statement that does not conform to grid_safe_assignment_p
948 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
949 variables in traversed bind statements so that they are put into the
950 appropriate segment. */
953 grid_copy_leading_local_assignments (gimple_seq src
, gimple_stmt_iterator
*dst
,
955 enum grid_var_segment var_segment
,
956 struct walk_stmt_info
*wi
)
958 hash_map
<tree
, tree
> *declmap
= (hash_map
<tree
, tree
> *) wi
->info
;
959 gimple_stmt_iterator gsi
;
960 for (gsi
= gsi_start (src
); !gsi_end_p (gsi
); gsi_next (&gsi
))
962 gimple
*stmt
= gsi_stmt (gsi
);
963 if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
965 gimple
*r
= grid_copy_leading_local_assignments
966 (gimple_bind_body (bind
), dst
, tgt_bind
, var_segment
, wi
);
968 if (var_segment
!= GRID_SEGMENT_PRIVATE
)
969 for (tree var
= gimple_bind_vars (bind
);
971 var
= DECL_CHAIN (var
))
972 grid_mark_variable_segment (var
, var_segment
);
978 if (!grid_safe_assignment_p (stmt
, NULL
))
980 tree lhs
= gimple_assign_lhs (as_a
<gassign
*> (stmt
));
981 tree repl
= copy_var_decl (lhs
, create_tmp_var_name (NULL
),
983 DECL_CONTEXT (repl
) = current_function_decl
;
984 gimple_bind_append_vars (tgt_bind
, repl
);
986 declmap
->put (lhs
, repl
);
987 gassign
*copy
= as_a
<gassign
*> (gimple_copy (stmt
));
988 walk_gimple_op (copy
, grid_remap_prebody_decls
, wi
);
989 gsi_insert_before (dst
, copy
, GSI_SAME_STMT
);
994 /* Statement walker function to make adjustments to statements within the
995 gridifed kernel copy. */
998 grid_process_grid_body (gimple_stmt_iterator
*gsi
, bool *handled_ops_p
,
999 struct walk_stmt_info
*)
1001 *handled_ops_p
= false;
1002 gimple
*stmt
= gsi_stmt (*gsi
);
1003 if (gimple_code (stmt
) == GIMPLE_OMP_FOR
1004 && (gimple_omp_for_kind (stmt
) & GF_OMP_FOR_SIMD
))
1006 gomp_for
*loop
= as_a
<gomp_for
*> (stmt
);
1007 tree clauses
= gimple_omp_for_clauses (loop
);
1008 tree cl
= omp_find_clause (clauses
, OMP_CLAUSE_SAFELEN
);
1010 OMP_CLAUSE_SAFELEN_EXPR (cl
) = integer_one_node
;
1013 tree c
= build_omp_clause (UNKNOWN_LOCATION
, OMP_CLAUSE_SAFELEN
);
1014 OMP_CLAUSE_SAFELEN_EXPR (c
) = integer_one_node
;
1015 OMP_CLAUSE_CHAIN (c
) = clauses
;
1016 gimple_omp_for_set_clauses (loop
, c
);
1022 /* Given a PARLOOP that is a normal for looping construct but also a part of a
1023 combined construct with a simd loop, eliminate the simd loop. */
1026 grid_eliminate_combined_simd_part (gomp_for
*parloop
)
1028 struct walk_stmt_info wi
;
1030 memset (&wi
, 0, sizeof (wi
));
1032 enum gf_mask msk
= GF_OMP_FOR_SIMD
;
1033 wi
.info
= (void *) &msk
;
1034 walk_gimple_seq (gimple_omp_body (parloop
), omp_find_combined_for
, NULL
, &wi
);
1035 gimple
*stmt
= (gimple
*) wi
.info
;
1036 /* We expect that the SIMD id the only statement in the parallel loop. */
1038 && gimple_code (stmt
) == GIMPLE_OMP_FOR
1039 && (gimple_omp_for_kind (stmt
) == GF_OMP_FOR_SIMD
)
1040 && gimple_omp_for_combined_into_p (stmt
)
1041 && !gimple_omp_for_combined_p (stmt
));
1042 gomp_for
*simd
= as_a
<gomp_for
*> (stmt
);
1044 /* Copy over the iteration properties because the body refers to the index in
1045 the bottmom-most loop. */
1046 unsigned i
, collapse
= gimple_omp_for_collapse (parloop
);
1047 gcc_checking_assert (collapse
== gimple_omp_for_collapse (simd
));
1048 for (i
= 0; i
< collapse
; i
++)
1050 gimple_omp_for_set_index (parloop
, i
, gimple_omp_for_index (simd
, i
));
1051 gimple_omp_for_set_initial (parloop
, i
, gimple_omp_for_initial (simd
, i
));
1052 gimple_omp_for_set_final (parloop
, i
, gimple_omp_for_final (simd
, i
));
1053 gimple_omp_for_set_incr (parloop
, i
, gimple_omp_for_incr (simd
, i
));
1056 tree
*tgt
= gimple_omp_for_clauses_ptr (parloop
);
1058 tgt
= &OMP_CLAUSE_CHAIN (*tgt
);
1060 /* Copy over all clauses, except for linear clauses, which are turned into
1061 private clauses, and all other simd-specific clauses, which are
1063 tree
*pc
= gimple_omp_for_clauses_ptr (simd
);
1067 switch (TREE_CODE (c
))
1069 case OMP_CLAUSE_LINEAR
:
1071 tree priv
= build_omp_clause (UNKNOWN_LOCATION
, OMP_CLAUSE_PRIVATE
);
1072 OMP_CLAUSE_DECL (priv
) = OMP_CLAUSE_DECL (c
);
1073 OMP_CLAUSE_CHAIN (priv
) = NULL
;
1075 tgt
= &OMP_CLAUSE_CHAIN (priv
);
1076 pc
= &OMP_CLAUSE_CHAIN (c
);
1080 case OMP_CLAUSE_SAFELEN
:
1081 case OMP_CLAUSE_SIMDLEN
:
1082 case OMP_CLAUSE_ALIGNED
:
1083 pc
= &OMP_CLAUSE_CHAIN (c
);
1087 *pc
= OMP_CLAUSE_CHAIN (c
);
1088 OMP_CLAUSE_CHAIN (c
) = NULL
;
1090 tgt
= &OMP_CLAUSE_CHAIN (c
);
1095 /* Finally, throw away the simd and mark the parallel loop as not
1097 gimple_omp_set_body (parloop
, gimple_omp_body (simd
));
1098 gimple_omp_for_set_combined_p (parloop
, false);
1101 /* Statement walker function marking all parallels as grid_phony and loops as
1102 grid ones representing threads of a particular thread group. */
1105 grid_mark_tiling_loops (gimple_stmt_iterator
*gsi
, bool *handled_ops_p
,
1106 struct walk_stmt_info
*wi_in
)
1108 *handled_ops_p
= false;
1109 if (gomp_for
*loop
= dyn_cast
<gomp_for
*> (gsi_stmt (*gsi
)))
1111 *handled_ops_p
= true;
1112 gimple_omp_for_set_kind (loop
, GF_OMP_FOR_KIND_GRID_LOOP
);
1113 gimple_omp_for_set_grid_intra_group (loop
, true);
1114 if (gimple_omp_for_combined_p (loop
))
1115 grid_eliminate_combined_simd_part (loop
);
1117 struct walk_stmt_info body_wi
;
1118 memset (&body_wi
, 0, sizeof (body_wi
));
1119 walk_gimple_seq_mod (gimple_omp_body_ptr (loop
),
1120 grid_process_grid_body
, NULL
, &body_wi
);
1122 gbind
*bind
= (gbind
*) wi_in
->info
;
1124 for (c
= gimple_omp_for_clauses (loop
); c
; c
= OMP_CLAUSE_CHAIN (c
))
1125 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LASTPRIVATE
)
1127 push_gimplify_context ();
1128 tree ov
= OMP_CLAUSE_DECL (c
);
1129 tree gv
= copy_var_decl (ov
, create_tmp_var_name (NULL
),
1132 grid_mark_variable_segment (gv
, GRID_SEGMENT_GROUP
);
1133 DECL_CONTEXT (gv
) = current_function_decl
;
1134 gimple_bind_append_vars (bind
, gv
);
1135 tree x
= lang_hooks
.decls
.omp_clause_assign_op (c
, gv
, ov
);
1136 gimplify_and_add (x
, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c
));
1137 x
= lang_hooks
.decls
.omp_clause_copy_ctor (c
, ov
, gv
);
1138 gimple_seq l
= NULL
;
1139 gimplify_and_add (x
, &l
);
1140 gsi_insert_seq_after (gsi
, l
, GSI_SAME_STMT
);
1141 pop_gimplify_context (bind
);
1147 /* Statement walker function marking all parallels as grid_phony and loops as
1148 grid ones representing threads of a particular thread group. */
1151 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator
*gsi
,
1152 bool *handled_ops_p
,
1153 struct walk_stmt_info
*wi_in
)
1155 *handled_ops_p
= false;
1156 wi_in
->removed_stmt
= false;
1157 gimple
*stmt
= gsi_stmt (*gsi
);
1158 if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
1160 for (tree var
= gimple_bind_vars (bind
); var
; var
= DECL_CHAIN (var
))
1161 grid_mark_variable_segment (var
, GRID_SEGMENT_GROUP
);
1163 else if (gomp_parallel
*parallel
= dyn_cast
<gomp_parallel
*> (stmt
))
1165 *handled_ops_p
= true;
1166 gimple_omp_parallel_set_grid_phony (parallel
, true);
1168 gbind
*new_bind
= gimple_build_bind (NULL
, NULL
, make_node (BLOCK
));
1169 gimple_bind_set_body (new_bind
, gimple_omp_body (parallel
));
1170 gimple_seq s
= NULL
;
1171 gimple_seq_add_stmt (&s
, new_bind
);
1172 gimple_omp_set_body (parallel
, s
);
1174 struct walk_stmt_info wi_par
;
1175 memset (&wi_par
, 0, sizeof (wi_par
));
1176 wi_par
.info
= new_bind
;
1177 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind
),
1178 grid_mark_tiling_loops
, NULL
, &wi_par
);
1180 else if (is_a
<gcall
*> (stmt
))
1181 wi_in
->removed_stmt
= grid_handle_call_in_distribute (gsi
);
1185 /* Given freshly copied top level kernel SEQ, identify the individual OMP
1186 components, mark them as part of kernel, copy assignment leading to them
1187 just before DST, remapping them using WI and adding new temporaries to
1188 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
1191 grid_process_kernel_body_copy (grid_prop
*grid
, gimple_seq seq
,
1192 gimple_stmt_iterator
*dst
,
1193 gbind
*tgt_bind
, struct walk_stmt_info
*wi
)
1195 gimple
*stmt
= grid_copy_leading_local_assignments (seq
, dst
, tgt_bind
,
1196 GRID_SEGMENT_GLOBAL
, wi
);
1197 gomp_teams
*teams
= dyn_cast
<gomp_teams
*> (stmt
);
1199 gimple_omp_teams_set_grid_phony (teams
, true);
1200 stmt
= grid_copy_leading_local_assignments (gimple_omp_body (teams
), dst
,
1201 tgt_bind
, GRID_SEGMENT_GLOBAL
,
1203 gcc_checking_assert (stmt
);
1204 gomp_for
*dist
= dyn_cast
<gomp_for
*> (stmt
);
1206 gimple_seq prebody
= gimple_omp_for_pre_body (dist
);
1208 grid_copy_leading_local_assignments (prebody
, dst
, tgt_bind
,
1209 GRID_SEGMENT_GROUP
, wi
);
1213 gimple_omp_for_set_kind (dist
, GF_OMP_FOR_KIND_GRID_LOOP
);
1214 gimple_omp_for_set_grid_group_iter (dist
, true);
1216 struct walk_stmt_info wi_tiled
;
1217 memset (&wi_tiled
, 0, sizeof (wi_tiled
));
1218 walk_gimple_seq_mod (gimple_omp_body_ptr (dist
),
1219 grid_mark_tiling_parallels_and_loops
, NULL
,
1225 gimple_omp_for_set_grid_phony (dist
, true);
1226 stmt
= grid_copy_leading_local_assignments (gimple_omp_body (dist
), dst
,
1228 GRID_SEGMENT_PRIVATE
, wi
);
1229 gcc_checking_assert (stmt
);
1230 gomp_parallel
*parallel
= as_a
<gomp_parallel
*> (stmt
);
1231 gimple_omp_parallel_set_grid_phony (parallel
, true);
1232 stmt
= grid_copy_leading_local_assignments (gimple_omp_body (parallel
),
1234 GRID_SEGMENT_PRIVATE
, wi
);
1235 gomp_for
*inner_loop
= as_a
<gomp_for
*> (stmt
);
1236 gimple_omp_for_set_kind (inner_loop
, GF_OMP_FOR_KIND_GRID_LOOP
);
1237 prebody
= gimple_omp_for_pre_body (inner_loop
);
1239 grid_copy_leading_local_assignments (prebody
, dst
, tgt_bind
,
1240 GRID_SEGMENT_PRIVATE
, wi
);
1242 if (gimple_omp_for_combined_p (inner_loop
))
1243 grid_eliminate_combined_simd_part (inner_loop
);
1244 struct walk_stmt_info body_wi
;
1245 memset (&body_wi
, 0, sizeof (body_wi
));
1246 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop
),
1247 grid_process_grid_body
, NULL
, &body_wi
);
1253 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1254 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
1255 is the bind into which temporaries inserted before TARGET should be
1259 grid_attempt_target_gridification (gomp_target
*target
,
1260 gimple_stmt_iterator
*gsi
,
1263 /* removed group_size */
1264 grid_prop grid
= {};
1265 if (!target
|| !grid_target_follows_gridifiable_pattern (target
, &grid
))
1268 location_t loc
= gimple_location (target
);
1269 if (dump_enabled_p ())
1270 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, target
,
1271 "Target construct will be turned into a gridified HSA "
1274 /* Copy target body to a GPUKERNEL construct: */
1275 gimple_seq kernel_seq
= copy_gimple_seq_and_replace_locals
1276 (gimple_omp_body (target
));
1278 hash_map
<tree
, tree
> *declmap
= new hash_map
<tree
, tree
>;
1279 struct walk_stmt_info wi
;
1280 memset (&wi
, 0, sizeof (struct walk_stmt_info
));
1283 /* Copy assignments in between OMP statements before target, mark OMP
1284 statements within copy appropriately. */
1285 gomp_for
*inner_loop
= grid_process_kernel_body_copy (&grid
, kernel_seq
, gsi
,
1289 = as_a
<gbind
*> (gimple_seq_first (gimple_omp_body (target
)));
1290 gbind
*new_bind
= as_a
<gbind
*> (gimple_seq_first (kernel_seq
));
1291 tree new_block
= gimple_bind_block (new_bind
);
1292 tree enc_block
= BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind
));
1293 BLOCK_CHAIN (new_block
) = BLOCK_SUBBLOCKS (enc_block
);
1294 BLOCK_SUBBLOCKS (enc_block
) = new_block
;
1295 BLOCK_SUPERCONTEXT (new_block
) = enc_block
;
1296 gimple
*gpukernel
= gimple_build_omp_grid_body (kernel_seq
);
1298 (gimple_bind_body_ptr (as_a
<gbind
*> (gimple_omp_body (target
))),
1301 for (size_t i
= 0; i
< grid
.collapse
; i
++)
1302 walk_tree (&grid
.group_sizes
[i
], grid_remap_prebody_decls
, &wi
, NULL
);
1303 push_gimplify_context ();
1304 for (size_t i
= 0; i
< grid
.collapse
; i
++)
1306 tree index_var
= gimple_omp_for_index (inner_loop
, i
);
1307 tree itype
, type
= TREE_TYPE (index_var
);
1308 if (POINTER_TYPE_P (type
))
1309 itype
= signed_type_for (type
);
1313 enum tree_code cond_code
= gimple_omp_for_cond (inner_loop
, i
);
1314 tree n1
= unshare_expr (gimple_omp_for_initial (inner_loop
, i
));
1315 walk_tree (&n1
, grid_remap_prebody_decls
, &wi
, NULL
);
1316 tree n2
= unshare_expr (gimple_omp_for_final (inner_loop
, i
));
1317 walk_tree (&n2
, grid_remap_prebody_decls
, &wi
, NULL
);
1319 = omp_get_for_step_from_incr (loc
, gimple_omp_for_incr (inner_loop
, i
));
1320 omp_adjust_for_condition (loc
, &cond_code
, &n2
, index_var
, step
);
1321 n1
= fold_convert (itype
, n1
);
1322 n2
= fold_convert (itype
, n2
);
1324 tree cond
= fold_build2 (cond_code
, boolean_type_node
, n1
, n2
);
1326 tree t
= build_int_cst (itype
, (cond_code
== LT_EXPR
? -1 : 1));
1327 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
1328 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
1329 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1
);
1330 if (TYPE_UNSIGNED (itype
) && cond_code
== GT_EXPR
)
1331 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1332 fold_build1 (NEGATE_EXPR
, itype
, t
),
1333 fold_build1 (NEGATE_EXPR
, itype
, step
));
1335 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
1336 t
= fold_build3 (COND_EXPR
, itype
, cond
, t
, build_zero_cst (itype
));
1339 if (cond_code
== GT_EXPR
)
1340 step
= fold_build1 (NEGATE_EXPR
, itype
, step
);
1341 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
1344 tree gs
= fold_convert (uint32_type_node
, t
);
1345 gimple_seq tmpseq
= NULL
;
1346 gimplify_expr (&gs
, &tmpseq
, NULL
, is_gimple_val
, fb_rvalue
);
1347 if (!gimple_seq_empty_p (tmpseq
))
1348 gsi_insert_seq_before (gsi
, tmpseq
, GSI_SAME_STMT
);
1351 if (grid
.group_sizes
[i
])
1353 ws
= fold_convert (uint32_type_node
, grid
.group_sizes
[i
]);
1355 gimplify_expr (&ws
, &tmpseq
, NULL
, is_gimple_val
, fb_rvalue
);
1356 if (!gimple_seq_empty_p (tmpseq
))
1357 gsi_insert_seq_before (gsi
, tmpseq
, GSI_SAME_STMT
);
1360 ws
= build_zero_cst (uint32_type_node
);
1362 tree c
= build_omp_clause (UNKNOWN_LOCATION
, OMP_CLAUSE__GRIDDIM_
);
1363 OMP_CLAUSE__GRIDDIM__DIMENSION (c
) = i
;
1364 OMP_CLAUSE__GRIDDIM__SIZE (c
) = gs
;
1365 OMP_CLAUSE__GRIDDIM__GROUP (c
) = ws
;
1366 OMP_CLAUSE_CHAIN (c
) = gimple_omp_target_clauses (target
);
1367 gimple_omp_target_set_clauses (target
, c
);
1369 pop_gimplify_context (tgt_bind
);
1374 /* Walker function doing all the work for create_target_kernels. */
1377 grid_gridify_all_targets_stmt (gimple_stmt_iterator
*gsi
,
1378 bool *handled_ops_p
,
1379 struct walk_stmt_info
*incoming
)
1381 *handled_ops_p
= false;
1383 gimple
*stmt
= gsi_stmt (*gsi
);
1384 gomp_target
*target
= dyn_cast
<gomp_target
*> (stmt
);
1387 gbind
*tgt_bind
= (gbind
*) incoming
->info
;
1388 gcc_checking_assert (tgt_bind
);
1389 grid_attempt_target_gridification (target
, gsi
, tgt_bind
);
1392 gbind
*bind
= dyn_cast
<gbind
*> (stmt
);
1395 *handled_ops_p
= true;
1396 struct walk_stmt_info wi
;
1397 memset (&wi
, 0, sizeof (wi
));
1399 walk_gimple_seq_mod (gimple_bind_body_ptr (bind
),
1400 grid_gridify_all_targets_stmt
, NULL
, &wi
);
1405 /* Attempt to gridify all target constructs in BODY_P. All such targets will
1406 have their bodies duplicated, with the new copy being put into a
1407 gimple_omp_grid_body statement. All kernel-related construct within the
1408 grid_body will be marked with phony flags or kernel kinds. Moreover, some
1409 re-structuring is often needed, such as copying pre-bodies before the target
1410 construct so that kernel grid sizes can be computed. */
1413 omp_grid_gridify_all_targets (gimple_seq
*body_p
)
1415 struct walk_stmt_info wi
;
1416 memset (&wi
, 0, sizeof (wi
));
1417 walk_gimple_seq_mod (body_p
, grid_gridify_all_targets_stmt
, NULL
, &wi
);