]>
Commit | Line | Data |
---|---|---|
e898ce79 GB |
1 | /* Decompose OpenACC 'kernels' constructs into parts, a sequence of compute |
2 | constructs | |
3 | ||
a945c346 | 4 | Copyright (C) 2020-2024 Free Software Foundation, Inc. |
e898ce79 GB |
5 | |
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 3, or (at your option) any later | |
11 | version. | |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "backend.h" | |
26 | #include "target.h" | |
27 | #include "tree.h" | |
ccd56db8 | 28 | #include "langhooks.h" |
e898ce79 GB |
29 | #include "gimple.h" |
30 | #include "tree-pass.h" | |
31 | #include "cgraph.h" | |
32 | #include "fold-const.h" | |
33 | #include "gimplify.h" | |
34 | #include "gimple-iterator.h" | |
35 | #include "gimple-walk.h" | |
36 | #include "gomp-constants.h" | |
37 | #include "omp-general.h" | |
38 | #include "diagnostic-core.h" | |
39 | ||
40 | ||
41 | /* This preprocessing pass is run immediately before lower_omp. It decomposes | |
42 | OpenACC 'kernels' constructs into parts, a sequence of compute constructs. | |
43 | ||
44 | The translation is as follows: | |
45 | - The entire 'kernels' region is turned into a 'data' region with clauses | |
46 | taken from the 'kernels' region. New 'create' clauses are added for all | |
47 | variables declared at the top level in the kernels region. | |
48 | - Any loop nests annotated with an OpenACC 'loop' directive are wrapped in | |
49 | a new compute construct. | |
50 | - 'loop' directives without an explicit 'independent' or 'seq' clause | |
51 | get an 'auto' clause added; other clauses are preserved on the loop | |
52 | or moved to the new surrounding compute construct, as applicable. | |
53 | - Any sequences of other code (non-loops, non-OpenACC 'loop's) are wrapped | |
54 | in new "gang-single" compute construct: 'worker'/'vector' parallelism is | |
55 | preserved, but 'num_gangs (1)' is enforced. | |
56 | - Both points above only apply at the topmost level in the region, that | |
57 | is, the transformation does not introduce new compute constructs inside | |
58 | nested statement bodies. In particular, this means that a | |
59 | gang-parallelizable loop inside an 'if' statement is made "gang-single". | |
60 | - In order to make the host wait only once for the whole region instead | |
61 | of once per device kernel launch, the new compute constructs are | |
62 | annotated 'async'. Unless the original 'kernels' construct already was | |
63 | marked 'async', the entire region ends with a 'wait' directive. If the | |
64 | original 'kernels' construct was marked 'async', the synthesized 'async' | |
65 | clauses use the original 'kernels' construct's 'async' argument | |
66 | (possibly implicit). | |
67 | */ | |
68 | ||
69 | ||
70 | /*TODO Things are conceptually wrong here: 'loop' clauses may be hidden behind | |
71 | 'device_type', so we have to defer a lot of processing until we're in the | |
72 | offloading compilation. "Fortunately", GCC doesn't support the OpenACC | |
73 | 'device_type' clause yet, so we get away that. */ | |
74 | ||
75 | ||
76 | /* Helper function for decompose_kernels_region_body. If STMT contains a | |
77 | "top-level" OMP_FOR statement, returns a pointer to that statement; | |
78 | returns NULL otherwise. | |
79 | ||
80 | A "top-level" OMP_FOR statement is one that is possibly accompanied by | |
81 | small snippets of setup code. Specifically, this function accepts an | |
82 | OMP_FOR possibly wrapped in a singleton bind and a singleton try | |
83 | statement to allow for a local loop variable, but not an OMP_FOR | |
84 | statement nested in any other constructs. Alternatively, it accepts a | |
85 | non-singleton bind containing only assignments and then an OMP_FOR | |
86 | statement at the very end. The former style can be generated by the C | |
87 | frontend, the latter by the Fortran frontend. */ | |
88 | ||
89 | static gimple * | |
90 | top_level_omp_for_in_stmt (gimple *stmt) | |
91 | { | |
92 | if (gimple_code (stmt) == GIMPLE_OMP_FOR) | |
93 | return stmt; | |
94 | ||
95 | if (gimple_code (stmt) == GIMPLE_BIND) | |
96 | { | |
97 | gimple_seq body = gimple_bind_body (as_a <gbind *> (stmt)); | |
98 | if (gimple_seq_singleton_p (body)) | |
99 | { | |
100 | /* Accept an OMP_FOR statement, or a try statement containing only | |
101 | a single OMP_FOR. */ | |
102 | gimple *maybe_for_or_try = gimple_seq_first_stmt (body); | |
103 | if (gimple_code (maybe_for_or_try) == GIMPLE_OMP_FOR) | |
104 | return maybe_for_or_try; | |
105 | else if (gimple_code (maybe_for_or_try) == GIMPLE_TRY) | |
106 | { | |
107 | gimple_seq try_body = gimple_try_eval (maybe_for_or_try); | |
108 | if (!gimple_seq_singleton_p (try_body)) | |
109 | return NULL; | |
110 | gimple *maybe_omp_for_stmt = gimple_seq_first_stmt (try_body); | |
111 | if (gimple_code (maybe_omp_for_stmt) == GIMPLE_OMP_FOR) | |
112 | return maybe_omp_for_stmt; | |
113 | } | |
114 | } | |
115 | else | |
116 | { | |
117 | gimple_stmt_iterator gsi; | |
118 | /* Accept only a block of optional assignments followed by an | |
119 | OMP_FOR at the end. No other kinds of statements allowed. */ | |
120 | for (gsi = gsi_start (body); !gsi_end_p (gsi); gsi_next (&gsi)) | |
121 | { | |
122 | gimple *body_stmt = gsi_stmt (gsi); | |
123 | if (gimple_code (body_stmt) == GIMPLE_ASSIGN) | |
124 | continue; | |
125 | else if (gimple_code (body_stmt) == GIMPLE_OMP_FOR | |
126 | && gsi_one_before_end_p (gsi)) | |
127 | return body_stmt; | |
128 | else | |
129 | return NULL; | |
130 | } | |
131 | } | |
132 | } | |
133 | ||
134 | return NULL; | |
135 | } | |
136 | ||
137 | /* Helper for adjust_region_code: evaluate the statement at GSI_P. */ | |
138 | ||
139 | static tree | |
140 | adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p, | |
141 | bool *handled_ops_p, | |
142 | struct walk_stmt_info *wi) | |
143 | { | |
144 | int *region_code = (int *) wi->info; | |
145 | ||
146 | gimple *stmt = gsi_stmt (*gsi_p); | |
147 | switch (gimple_code (stmt)) | |
148 | { | |
149 | case GIMPLE_OMP_FOR: | |
150 | { | |
151 | tree clauses = gimple_omp_for_clauses (stmt); | |
152 | if (omp_find_clause (clauses, OMP_CLAUSE_INDEPENDENT)) | |
153 | { | |
154 | /* Explicit 'independent' clause. */ | |
155 | /* Keep going; recurse into loop body. */ | |
156 | break; | |
157 | } | |
158 | else if (omp_find_clause (clauses, OMP_CLAUSE_SEQ)) | |
159 | { | |
160 | /* Explicit 'seq' clause. */ | |
161 | /* We'll "parallelize" if at some level a loop construct has been | |
162 | marked up by the user as unparallelizable ('seq' clause; we'll | |
163 | respect that in the later processing). Given that the user has | |
164 | explicitly marked it up, this loop construct cannot be | |
165 | performance-critical, and in this case it's also fine to | |
166 | "parallelize" instead of "gang-single", because any outer or | |
167 | inner loops may still exploit the available parallelism. */ | |
168 | /* Keep going; recurse into loop body. */ | |
169 | break; | |
170 | } | |
171 | else | |
172 | { | |
173 | /* Explicit or implicit 'auto' clause. */ | |
174 | /* The user would like this loop analyzed ('auto' clause) and | |
175 | typically parallelized, but we don't have available yet the | |
176 | compiler logic to analyze this, so can't parallelize it here, so | |
177 | we'd very likely be running into a performance problem if we | |
178 | were to execute this unparallelized, thus forward the whole loop | |
179 | nest to 'parloops'. */ | |
180 | *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS; | |
181 | /* Terminate: final decision for this region. */ | |
182 | *handled_ops_p = true; | |
183 | return integer_zero_node; | |
184 | } | |
185 | gcc_unreachable (); | |
186 | } | |
187 | ||
188 | case GIMPLE_COND: | |
189 | case GIMPLE_GOTO: | |
190 | case GIMPLE_SWITCH: | |
191 | case GIMPLE_ASM: | |
4dda30e9 | 192 | case GIMPLE_ASSUME: |
e898ce79 GB |
193 | case GIMPLE_TRANSACTION: |
194 | case GIMPLE_RETURN: | |
195 | /* Statement that might constitute some looping/control flow pattern. */ | |
196 | /* The user would like this code analyzed (implicit inside a 'kernels' | |
197 | region) and typically parallelized, but we don't have available yet | |
198 | the compiler logic to analyze this, so can't parallelize it here, so | |
199 | we'd very likely be running into a performance problem if we were to | |
200 | execute this unparallelized, thus forward the whole thing to | |
201 | 'parloops'. */ | |
202 | *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS; | |
203 | /* Terminate: final decision for this region. */ | |
204 | *handled_ops_p = true; | |
205 | return integer_zero_node; | |
206 | ||
207 | default: | |
208 | /* Keep going. */ | |
209 | break; | |
210 | } | |
211 | ||
212 | return NULL; | |
213 | } | |
214 | ||
215 | /* Adjust the REGION_CODE for the region in GS. */ | |
216 | ||
217 | static void | |
218 | adjust_region_code (gimple_seq gs, int *region_code) | |
219 | { | |
220 | struct walk_stmt_info wi; | |
221 | memset (&wi, 0, sizeof (wi)); | |
222 | wi.info = region_code; | |
223 | walk_gimple_seq (gs, adjust_region_code_walk_stmt_fn, NULL, &wi); | |
224 | } | |
225 | ||
226 | /* Helper function for make_loops_gang_single for walking the tree. If the | |
227 | statement indicated by GSI_P is an OpenACC for loop with a gang clause, | |
228 | issue a warning and remove the clause. */ | |
229 | ||
230 | static tree | |
231 | visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p, | |
232 | bool *handled_ops_p, | |
233 | struct walk_stmt_info *) | |
234 | { | |
235 | *handled_ops_p = false; | |
236 | ||
237 | gimple *stmt = gsi_stmt (*gsi_p); | |
238 | switch (gimple_code (stmt)) | |
239 | { | |
240 | case GIMPLE_OMP_FOR: | |
241 | /*TODO Given the current 'adjust_region_code' algorithm, this is | |
242 | actually... */ | |
da630555 | 243 | #if 0 |
e898ce79 | 244 | gcc_unreachable (); |
da630555 TS |
245 | #else |
246 | /* ..., but due to bugs (PR100400), we may actually come here. | |
247 | Reliably catch this, regardless of checking level. */ | |
248 | internal_error ("PR100400"); | |
249 | #endif | |
e898ce79 GB |
250 | |
251 | { | |
252 | tree clauses = gimple_omp_for_clauses (stmt); | |
253 | tree prev_clause = NULL; | |
254 | for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause)) | |
255 | { | |
256 | if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG) | |
257 | { | |
258 | /* It makes no sense to have a 'gang' clause in a "gang-single" | |
259 | region, so warn and remove it. */ | |
260 | warning_at (gimple_location (stmt), 0, | |
261 | "conditionally executed loop in %<kernels%> region" | |
262 | " will be executed by a single gang;" | |
263 | " ignoring %<gang%> clause"); | |
264 | if (prev_clause != NULL) | |
265 | OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause); | |
266 | else | |
267 | clauses = OMP_CLAUSE_CHAIN (clause); | |
268 | ||
269 | break; | |
270 | } | |
271 | prev_clause = clause; | |
272 | } | |
273 | gimple_omp_for_set_clauses (stmt, clauses); | |
274 | } | |
275 | /* No need to recurse into nested statements; no loop nested inside | |
276 | this loop can be gang-partitioned. */ | |
277 | sorry ("%<gang%> loop in %<gang-single%> region"); | |
278 | *handled_ops_p = true; | |
279 | break; | |
280 | ||
281 | default: | |
282 | break; | |
283 | } | |
284 | ||
285 | return NULL; | |
286 | } | |
287 | ||
288 | /* Visit all nested OpenACC loops in the sequence indicated by GS. This | |
289 | statement is expected to be inside a gang-single region. Issue a warning | |
290 | for any loops inside it that have gang clauses and remove the clauses. */ | |
291 | ||
292 | static void | |
293 | make_loops_gang_single (gimple_seq gs) | |
294 | { | |
295 | struct walk_stmt_info wi; | |
296 | memset (&wi, 0, sizeof (wi)); | |
297 | walk_gimple_seq (gs, visit_loops_in_gang_single_region, NULL, &wi); | |
298 | } | |
299 | ||
300 | /* Construct a "gang-single" compute construct at LOC containing the STMTS. | |
301 | Annotate with CLAUSES, which must not contain a 'num_gangs' clause, and an | |
302 | additional 'num_gangs (1)' clause to force "gang-single" execution. */ | |
303 | ||
304 | static gimple * | |
305 | make_region_seq (location_t loc, gimple_seq stmts, | |
306 | tree num_gangs_clause, | |
307 | tree num_workers_clause, | |
308 | tree vector_length_clause, | |
309 | tree clauses) | |
310 | { | |
311 | /* This correctly unshares the entire clause chain rooted here. */ | |
312 | clauses = unshare_expr (clauses); | |
313 | ||
314 | dump_user_location_t loc_stmts_first = gimple_seq_first (stmts); | |
315 | ||
316 | /* Figure out the region code for this region. */ | |
317 | /* Optimistic default: assume "setup code", no looping; thus not | |
318 | performance-critical. */ | |
319 | int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE; | |
320 | adjust_region_code (stmts, ®ion_code); | |
321 | ||
322 | if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE) | |
323 | { | |
324 | if (dump_enabled_p ()) | |
325 | /*TODO MSG_MISSED_OPTIMIZATION? */ | |
326 | dump_printf_loc (MSG_NOTE, loc_stmts_first, | |
327 | "beginning %<gang-single%> part" | |
328 | " in OpenACC %<kernels%> region\n"); | |
329 | ||
330 | /* Synthesize a 'num_gangs (1)' clause. */ | |
331 | tree gang_single_clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS); | |
332 | OMP_CLAUSE_OPERAND (gang_single_clause, 0) = integer_one_node; | |
333 | OMP_CLAUSE_CHAIN (gang_single_clause) = clauses; | |
334 | clauses = gang_single_clause; | |
335 | ||
336 | /* Remove and issue warnings about gang clauses on any OpenACC | |
337 | loops nested inside this sequentially executed statement. */ | |
338 | make_loops_gang_single (stmts); | |
339 | } | |
340 | else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS) | |
341 | { | |
342 | if (dump_enabled_p ()) | |
343 | dump_printf_loc (MSG_NOTE, loc_stmts_first, | |
344 | "beginning %<parloops%> part" | |
345 | " in OpenACC %<kernels%> region\n"); | |
346 | ||
347 | /* As we're transforming a 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another | |
348 | 'GF_OMP_TARGET_KIND_OACC_KERNELS', this isn't doing any of the clauses | |
349 | mangling that 'make_region_loop_nest' is doing. */ | |
350 | /* Re-assemble the clauses stripped off earlier. */ | |
351 | if (num_gangs_clause != NULL) | |
352 | { | |
353 | tree c = unshare_expr (num_gangs_clause); | |
354 | OMP_CLAUSE_CHAIN (c) = clauses; | |
355 | clauses = c; | |
356 | } | |
357 | if (num_workers_clause != NULL) | |
358 | { | |
359 | tree c = unshare_expr (num_workers_clause); | |
360 | OMP_CLAUSE_CHAIN (c) = clauses; | |
361 | clauses = c; | |
362 | } | |
363 | if (vector_length_clause != NULL) | |
364 | { | |
365 | tree c = unshare_expr (vector_length_clause); | |
366 | OMP_CLAUSE_CHAIN (c) = clauses; | |
367 | clauses = c; | |
368 | } | |
369 | } | |
370 | else | |
371 | gcc_unreachable (); | |
372 | ||
373 | /* Build the gang-single region. */ | |
374 | gimple *single_region = gimple_build_omp_target (NULL, region_code, clauses); | |
375 | gimple_set_location (single_region, loc); | |
376 | gbind *single_body = gimple_build_bind (NULL, stmts, make_node (BLOCK)); | |
377 | gimple_omp_set_body (single_region, single_body); | |
378 | ||
379 | return single_region; | |
380 | } | |
381 | ||
382 | /* Helper function for make_region_loop_nest. Adds a 'num_gangs' | |
383 | ('num_workers', 'vector_length') clause to the given CLAUSES, either the one | |
384 | from the parent compute construct (PARENT_CLAUSE) or a new one based on the | |
385 | loop's own LOOP_CLAUSE ('gang (num: N)' or similar for 'worker' or 'vector' | |
386 | clauses) with the given CLAUSE_CODE. Does nothing if neither PARENT_CLAUSE | |
387 | nor LOOP_CLAUSE exist. Returns the new clauses. */ | |
388 | ||
389 | static tree | |
390 | add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause, | |
391 | omp_clause_code clause_code, tree clauses) | |
392 | { | |
393 | if (parent_clause != NULL) | |
394 | { | |
395 | tree num_clause = unshare_expr (parent_clause); | |
396 | OMP_CLAUSE_CHAIN (num_clause) = clauses; | |
397 | clauses = num_clause; | |
398 | } | |
399 | else if (loop_clause != NULL) | |
400 | { | |
401 | /* The kernels region does not have a 'num_gangs' clause, but the loop | |
402 | itself had a 'gang (num: N)' clause. Honor it by adding a | |
403 | 'num_gangs (N)' clause on the compute construct. */ | |
404 | tree num = OMP_CLAUSE_OPERAND (loop_clause, 0); | |
405 | tree new_num_clause | |
406 | = build_omp_clause (OMP_CLAUSE_LOCATION (loop_clause), clause_code); | |
407 | OMP_CLAUSE_OPERAND (new_num_clause, 0) = num; | |
408 | OMP_CLAUSE_CHAIN (new_num_clause) = clauses; | |
409 | clauses = new_num_clause; | |
410 | } | |
411 | return clauses; | |
412 | } | |
413 | ||
414 | /* Helper for make_region_loop_nest, looking for 'worker (num: N)' or 'vector | |
415 | (length: N)' clauses in nested loops. Removes the argument, transferring it | |
416 | to the enclosing compute construct (via WI->INFO). If arguments within the | |
417 | same loop nest conflict, emits a warning. | |
418 | ||
419 | This function also decides whether to add an 'auto' clause on each of these | |
420 | nested loops. */ | |
421 | ||
422 | struct adjust_nested_loop_clauses_wi_info | |
423 | { | |
424 | tree *loop_gang_clause_ptr; | |
425 | tree *loop_worker_clause_ptr; | |
426 | tree *loop_vector_clause_ptr; | |
427 | }; | |
428 | ||
429 | static tree | |
430 | adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *, | |
431 | struct walk_stmt_info *wi) | |
432 | { | |
433 | struct adjust_nested_loop_clauses_wi_info *wi_info | |
434 | = (struct adjust_nested_loop_clauses_wi_info *) wi->info; | |
435 | gimple *stmt = gsi_stmt (*gsi_p); | |
436 | ||
437 | if (gimple_code (stmt) == GIMPLE_OMP_FOR) | |
438 | { | |
439 | bool add_auto_clause = true; | |
440 | tree loop_clauses = gimple_omp_for_clauses (stmt); | |
441 | tree loop_clause = loop_clauses; | |
442 | for (; loop_clause; loop_clause = OMP_CLAUSE_CHAIN (loop_clause)) | |
443 | { | |
444 | tree *outer_clause_ptr = NULL; | |
445 | switch (OMP_CLAUSE_CODE (loop_clause)) | |
446 | { | |
447 | case OMP_CLAUSE_GANG: | |
448 | outer_clause_ptr = wi_info->loop_gang_clause_ptr; | |
449 | break; | |
450 | case OMP_CLAUSE_WORKER: | |
451 | outer_clause_ptr = wi_info->loop_worker_clause_ptr; | |
452 | break; | |
453 | case OMP_CLAUSE_VECTOR: | |
454 | outer_clause_ptr = wi_info->loop_vector_clause_ptr; | |
455 | break; | |
456 | case OMP_CLAUSE_SEQ: | |
457 | case OMP_CLAUSE_INDEPENDENT: | |
458 | case OMP_CLAUSE_AUTO: | |
459 | add_auto_clause = false; | |
460 | default: | |
461 | break; | |
462 | } | |
463 | if (outer_clause_ptr != NULL) | |
464 | { | |
465 | if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL | |
466 | && *outer_clause_ptr == NULL) | |
467 | { | |
468 | /* Transfer the clause to the enclosing compute construct and | |
469 | remove the numerical argument from the 'loop'. */ | |
470 | *outer_clause_ptr = unshare_expr (loop_clause); | |
471 | OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL; | |
472 | } | |
473 | else if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL && | |
474 | OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0) != NULL) | |
475 | { | |
476 | /* See if both of these are the same constant. If they | |
477 | aren't, emit a warning. */ | |
478 | tree old_op = OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0); | |
479 | tree new_op = OMP_CLAUSE_OPERAND (loop_clause, 0); | |
480 | if (!(cst_and_fits_in_hwi (old_op) && | |
481 | cst_and_fits_in_hwi (new_op) && | |
482 | int_cst_value (old_op) == int_cst_value (new_op))) | |
483 | { | |
484 | const char *clause_name | |
485 | = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)]; | |
486 | error_at (gimple_location (stmt), | |
487 | "cannot honor conflicting %qs clause", | |
488 | clause_name); | |
489 | inform (OMP_CLAUSE_LOCATION (*outer_clause_ptr), | |
490 | "location of the previous clause" | |
491 | " in the same loop nest"); | |
492 | } | |
493 | OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL; | |
494 | } | |
495 | } | |
496 | } | |
497 | if (add_auto_clause) | |
498 | { | |
499 | tree auto_clause | |
500 | = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_AUTO); | |
501 | OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses; | |
502 | gimple_omp_for_set_clauses (stmt, auto_clause); | |
503 | } | |
504 | } | |
505 | ||
506 | return NULL; | |
507 | } | |
508 | ||
509 | /* Helper for make_region_loop_nest. Transform OpenACC 'kernels'/'loop' | |
510 | construct clauses into OpenACC 'parallel'/'loop' construct ones. */ | |
511 | ||
512 | static tree | |
513 | transform_kernels_loop_clauses (gimple *omp_for, | |
514 | tree num_gangs_clause, | |
515 | tree num_workers_clause, | |
516 | tree vector_length_clause, | |
517 | tree clauses) | |
518 | { | |
519 | /* If this loop in a kernels region does not have an explicit 'seq', | |
520 | 'independent', or 'auto' clause, we must give it an explicit 'auto' | |
521 | clause. | |
522 | We also check for 'gang (num: N)' clauses. These must not appear in | |
523 | kernels regions that have their own 'num_gangs' clause. Otherwise, they | |
524 | must be converted and put on the region; similarly for 'worker' and | |
525 | 'vector' clauses. */ | |
526 | bool add_auto_clause = true; | |
527 | tree loop_gang_clause = NULL, loop_worker_clause = NULL, | |
528 | loop_vector_clause = NULL; | |
529 | tree loop_clauses = gimple_omp_for_clauses (omp_for); | |
530 | for (tree loop_clause = loop_clauses; | |
531 | loop_clause; | |
532 | loop_clause = OMP_CLAUSE_CHAIN (loop_clause)) | |
533 | { | |
534 | bool found_num_clause = false; | |
535 | tree *clause_ptr, clause_to_check; | |
536 | switch (OMP_CLAUSE_CODE (loop_clause)) | |
537 | { | |
538 | case OMP_CLAUSE_GANG: | |
539 | found_num_clause = true; | |
540 | clause_ptr = &loop_gang_clause; | |
541 | clause_to_check = num_gangs_clause; | |
542 | break; | |
543 | case OMP_CLAUSE_WORKER: | |
544 | found_num_clause = true; | |
545 | clause_ptr = &loop_worker_clause; | |
546 | clause_to_check = num_workers_clause; | |
547 | break; | |
548 | case OMP_CLAUSE_VECTOR: | |
549 | found_num_clause = true; | |
550 | clause_ptr = &loop_vector_clause; | |
551 | clause_to_check = vector_length_clause; | |
552 | break; | |
553 | case OMP_CLAUSE_INDEPENDENT: | |
554 | case OMP_CLAUSE_SEQ: | |
555 | case OMP_CLAUSE_AUTO: | |
556 | add_auto_clause = false; | |
557 | default: | |
558 | break; | |
559 | } | |
560 | if (found_num_clause && OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL) | |
561 | { | |
562 | if (clause_to_check) | |
563 | { | |
564 | const char *clause_name | |
565 | = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)]; | |
566 | const char *parent_clause_name | |
567 | = omp_clause_code_name[OMP_CLAUSE_CODE (clause_to_check)]; | |
568 | error_at (OMP_CLAUSE_LOCATION (loop_clause), | |
569 | "argument not permitted on %qs clause" | |
570 | " in OpenACC %<kernels%> region with a %qs clause", | |
571 | clause_name, parent_clause_name); | |
572 | inform (OMP_CLAUSE_LOCATION (clause_to_check), | |
573 | "location of OpenACC %<kernels%>"); | |
574 | } | |
575 | /* Copy the 'gang (N)'/'worker (N)'/'vector (N)' clause to the | |
576 | enclosing compute construct. */ | |
577 | *clause_ptr = unshare_expr (loop_clause); | |
578 | OMP_CLAUSE_CHAIN (*clause_ptr) = NULL; | |
579 | /* Leave a 'gang'/'worker'/'vector' clause on the 'loop', but without | |
580 | argument. */ | |
581 | OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL; | |
582 | } | |
583 | } | |
584 | if (add_auto_clause) | |
585 | { | |
586 | tree auto_clause = build_omp_clause (gimple_location (omp_for), | |
587 | OMP_CLAUSE_AUTO); | |
588 | OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses; | |
589 | loop_clauses = auto_clause; | |
590 | } | |
591 | gimple_omp_for_set_clauses (omp_for, loop_clauses); | |
592 | /* We must also recurse into the loop; it might contain nested loops having | |
593 | their own 'worker (num: W)' or 'vector (length: V)' clauses. Turn these | |
594 | into 'worker'/'vector' clauses on the compute construct. */ | |
595 | struct walk_stmt_info wi; | |
596 | memset (&wi, 0, sizeof (wi)); | |
597 | struct adjust_nested_loop_clauses_wi_info wi_info; | |
598 | wi_info.loop_gang_clause_ptr = &loop_gang_clause; | |
599 | wi_info.loop_worker_clause_ptr = &loop_worker_clause; | |
600 | wi_info.loop_vector_clause_ptr = &loop_vector_clause; | |
601 | wi.info = &wi_info; | |
602 | gimple *body = gimple_omp_body (omp_for); | |
603 | walk_gimple_seq (body, adjust_nested_loop_clauses, NULL, &wi); | |
604 | /* Check if there were conflicting numbers of workers or vector length. */ | |
605 | if (loop_gang_clause != NULL && | |
606 | OMP_CLAUSE_OPERAND (loop_gang_clause, 0) == NULL) | |
607 | loop_gang_clause = NULL; | |
608 | if (loop_worker_clause != NULL && | |
609 | OMP_CLAUSE_OPERAND (loop_worker_clause, 0) == NULL) | |
610 | loop_worker_clause = NULL; | |
611 | if (loop_vector_clause != NULL && | |
612 | OMP_CLAUSE_OPERAND (loop_vector_clause, 0) == NULL) | |
613 | vector_length_clause = NULL; | |
614 | ||
615 | /* If the kernels region had 'num_gangs', 'num_worker', 'vector_length' | |
616 | clauses, add these to this new compute construct. */ | |
617 | clauses | |
618 | = add_parent_or_loop_num_clause (num_gangs_clause, loop_gang_clause, | |
619 | OMP_CLAUSE_NUM_GANGS, clauses); | |
620 | clauses | |
621 | = add_parent_or_loop_num_clause (num_workers_clause, loop_worker_clause, | |
622 | OMP_CLAUSE_NUM_WORKERS, clauses); | |
623 | clauses | |
624 | = add_parent_or_loop_num_clause (vector_length_clause, loop_vector_clause, | |
625 | OMP_CLAUSE_VECTOR_LENGTH, clauses); | |
626 | ||
627 | return clauses; | |
628 | } | |
629 | ||
630 | /* Construct a possibly gang-parallel compute construct containing the STMT, | |
631 | which must be identical to, or a bind containing, the loop OMP_FOR. | |
632 | ||
633 | The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are | |
634 | optional clauses from the original kernels region and must not be contained | |
635 | in the other CLAUSES. The newly created compute construct is annotated with | |
636 | the optional NUM_GANGS_CLAUSE as well as the other CLAUSES. If there is no | |
637 | NUM_GANGS_CLAUSE but the loop has a 'gang (num: N)' clause, that is | |
638 | converted to a 'num_gangs (N)' clause on the new compute construct, and | |
639 | similarly for 'worker' and 'vector' clauses. | |
640 | ||
641 | The outermost loop gets an 'auto' clause unless there already is an | |
642 | 'seq'/'independent'/'auto' clause. Nested loops inside OMP_FOR are treated | |
643 | similarly by the adjust_nested_loop_clauses function. */ | |
644 | ||
645 | static gimple * | |
646 | make_region_loop_nest (gimple *omp_for, gimple_seq stmts, | |
647 | tree num_gangs_clause, | |
648 | tree num_workers_clause, | |
649 | tree vector_length_clause, | |
650 | tree clauses) | |
651 | { | |
652 | /* This correctly unshares the entire clause chain rooted here. */ | |
653 | clauses = unshare_expr (clauses); | |
654 | ||
655 | /* Figure out the region code for this region. */ | |
656 | /* Optimistic default: assume that the loop nest is parallelizable | |
657 | (essentially, no GIMPLE_OMP_FOR with (explicit or implicit) 'auto' clause, | |
658 | and no un-annotated loops). */ | |
659 | int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED; | |
660 | adjust_region_code (stmts, ®ion_code); | |
661 | ||
662 | if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED) | |
663 | { | |
664 | if (dump_enabled_p ()) | |
665 | /* This is not MSG_OPTIMIZED_LOCATIONS, as we're just doing what the | |
666 | user asked us to. */ | |
667 | dump_printf_loc (MSG_NOTE, omp_for, | |
668 | "parallelized loop nest" | |
669 | " in OpenACC %<kernels%> region\n"); | |
670 | ||
671 | clauses = transform_kernels_loop_clauses (omp_for, | |
672 | num_gangs_clause, | |
673 | num_workers_clause, | |
674 | vector_length_clause, | |
675 | clauses); | |
676 | } | |
677 | else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS) | |
678 | { | |
679 | if (dump_enabled_p ()) | |
680 | dump_printf_loc (MSG_NOTE, omp_for, | |
681 | "forwarded loop nest" | |
682 | " in OpenACC %<kernels%> region" | |
683 | " to %<parloops%> for analysis\n"); | |
684 | ||
685 | /* We're transforming one 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another | |
686 | 'GF_OMP_TARGET_KIND_OACC_KERNELS', so don't have to | |
687 | 'transform_kernels_loop_clauses'. */ | |
688 | /* Re-assemble the clauses stripped off earlier. */ | |
689 | clauses | |
690 | = add_parent_or_loop_num_clause (num_gangs_clause, NULL, | |
691 | OMP_CLAUSE_NUM_GANGS, clauses); | |
692 | clauses | |
693 | = add_parent_or_loop_num_clause (num_workers_clause, NULL, | |
694 | OMP_CLAUSE_NUM_WORKERS, clauses); | |
695 | clauses | |
696 | = add_parent_or_loop_num_clause (vector_length_clause, NULL, | |
697 | OMP_CLAUSE_VECTOR_LENGTH, clauses); | |
698 | } | |
699 | else | |
700 | gcc_unreachable (); | |
701 | ||
702 | gimple *parallel_body_bind | |
703 | = gimple_build_bind (NULL, stmts, make_node (BLOCK)); | |
704 | gimple *parallel_region | |
705 | = gimple_build_omp_target (parallel_body_bind, region_code, clauses); | |
706 | gimple_set_location (parallel_region, gimple_location (omp_for)); | |
707 | ||
708 | return parallel_region; | |
709 | } | |
710 | ||
711 | /* Eliminate any binds directly inside BIND by adding their statements to | |
712 | BIND (i.e., modifying it in place), excluding binds that hold only an | |
713 | OMP_FOR loop and associated setup/cleanup code. Recurse into binds but | |
714 | not other statements. Return a chain of the local variables of eliminated | |
715 | binds, i.e., the local variables found in nested binds. If | |
716 | INCLUDE_TOPLEVEL_VARS is true, this also includes the variables belonging | |
717 | to BIND itself. */ | |
718 | ||
719 | static tree | |
720 | flatten_binds (gbind *bind, bool include_toplevel_vars = false) | |
721 | { | |
722 | tree vars = NULL, last_var = NULL; | |
723 | ||
724 | if (include_toplevel_vars) | |
725 | { | |
726 | vars = gimple_bind_vars (bind); | |
727 | last_var = vars; | |
728 | } | |
729 | ||
730 | gimple_seq new_body = NULL; | |
731 | gimple_seq body_sequence = gimple_bind_body (bind); | |
732 | gimple_stmt_iterator gsi, gsi_n; | |
733 | for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n) | |
734 | { | |
735 | /* Advance the iterator here because otherwise it would be invalidated | |
736 | by moving statements below. */ | |
737 | gsi_n = gsi; | |
738 | gsi_next (&gsi_n); | |
739 | ||
740 | gimple *stmt = gsi_stmt (gsi); | |
741 | /* Flatten bind statements, except the ones that contain only an | |
742 | OpenACC for loop. */ | |
743 | if (gimple_code (stmt) == GIMPLE_BIND | |
744 | && !top_level_omp_for_in_stmt (stmt)) | |
745 | { | |
746 | gbind *inner_bind = as_a <gbind *> (stmt); | |
747 | /* Flatten recursively, and collect all variables. */ | |
748 | tree inner_vars = flatten_binds (inner_bind, true); | |
749 | gimple_seq inner_sequence = gimple_bind_body (inner_bind); | |
4b5726fd TS |
750 | if (flag_checking) |
751 | { | |
752 | for (gimple_stmt_iterator inner_gsi = gsi_start (inner_sequence); | |
753 | !gsi_end_p (inner_gsi); | |
754 | gsi_next (&inner_gsi)) | |
755 | { | |
756 | gimple *inner_stmt = gsi_stmt (inner_gsi); | |
757 | gcc_assert (gimple_code (inner_stmt) != GIMPLE_BIND | |
758 | || top_level_omp_for_in_stmt (inner_stmt)); | |
759 | } | |
760 | } | |
e898ce79 GB |
761 | gimple_seq_add_seq (&new_body, inner_sequence); |
762 | /* Find the last variable; we will append others to it. */ | |
763 | while (last_var != NULL && TREE_CHAIN (last_var) != NULL) | |
764 | last_var = TREE_CHAIN (last_var); | |
765 | if (last_var != NULL) | |
766 | { | |
767 | TREE_CHAIN (last_var) = inner_vars; | |
768 | last_var = inner_vars; | |
769 | } | |
770 | else | |
771 | { | |
772 | vars = inner_vars; | |
773 | last_var = vars; | |
774 | } | |
775 | } | |
776 | else | |
777 | gimple_seq_add_stmt (&new_body, stmt); | |
778 | } | |
779 | ||
780 | /* Put the possibly transformed body back into the bind. */ | |
781 | gimple_bind_set_body (bind, new_body); | |
782 | return vars; | |
783 | } | |
784 | ||
785 | /* Helper function for places where we construct data regions. Wraps the BODY | |
786 | inside a try-finally construct at LOC that calls __builtin_GOACC_data_end | |
787 | in its cleanup block. Returns this try statement. */ | |
788 | ||
789 | static gimple * | |
790 | make_data_region_try_statement (location_t loc, gimple *body) | |
791 | { | |
792 | tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END); | |
793 | gimple *call = gimple_build_call (data_end_fn, 0); | |
794 | gimple_seq cleanup = NULL; | |
795 | gimple_seq_add_stmt (&cleanup, call); | |
796 | gimple *try_stmt = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY); | |
797 | gimple_set_location (body, loc); | |
798 | return try_stmt; | |
799 | } | |
800 | ||
801 | /* If INNER_BIND_VARS holds variables, build an OpenACC data region with | |
802 | location LOC containing BODY and having 'create (var)' clauses for each | |
9b32c166 TS |
803 | variable (as a side effect, such variables also get TREE_ADDRESSABLE set). |
804 | If INNER_CLEANUP is present, add a try-finally statement with | |
e898ce79 GB |
805 | this cleanup code in the finally block. Return the new data region, or |
806 | the original BODY if no data region was needed. */ | |
807 | ||
808 | static gimple * | |
809 | maybe_build_inner_data_region (location_t loc, gimple *body, | |
810 | tree inner_bind_vars, gimple *inner_cleanup) | |
811 | { | |
ccd56db8 KCY |
812 | /* Is this an instantiation of a template? (In this case, we don't care what |
813 | the generic decl is - just whether the function decl has one.) */ | |
814 | bool generic_inst_p | |
815 | = (lang_hooks.decls.get_generic_function_decl (current_function_decl) | |
816 | != NULL); | |
817 | ||
e898ce79 GB |
818 | /* Build data 'create (var)' clauses for these local variables. |
819 | Below we will add these to a data region enclosing the entire body | |
820 | of the decomposed kernels region. */ | |
821 | tree prev_mapped_var = NULL, next = NULL, artificial_vars = NULL, | |
822 | inner_data_clauses = NULL; | |
823 | for (tree v = inner_bind_vars; v; v = next) | |
824 | { | |
825 | next = TREE_CHAIN (v); | |
826 | if (DECL_ARTIFICIAL (v) | |
827 | || TREE_CODE (v) == CONST_DECL | |
ccd56db8 | 828 | || generic_inst_p) |
e898ce79 GB |
829 | { |
830 | /* If this is an artificial temporary, it need not be mapped. We | |
831 | move its declaration into the bind inside the data region. | |
832 | Also avoid mapping variables if we are inside a template | |
833 | instantiation; the code does not contain all the copies to | |
834 | temporaries that would make this legal. */ | |
835 | TREE_CHAIN (v) = artificial_vars; | |
836 | artificial_vars = v; | |
837 | if (prev_mapped_var != NULL) | |
838 | TREE_CHAIN (prev_mapped_var) = next; | |
839 | else | |
840 | inner_bind_vars = next; | |
841 | } | |
842 | else | |
843 | { | |
844 | /* Otherwise, build the map clause. */ | |
845 | tree new_clause = build_omp_clause (loc, OMP_CLAUSE_MAP); | |
846 | OMP_CLAUSE_SET_MAP_KIND (new_clause, GOMP_MAP_ALLOC); | |
847 | OMP_CLAUSE_DECL (new_clause) = v; | |
848 | OMP_CLAUSE_SIZE (new_clause) = DECL_SIZE_UNIT (v); | |
849 | OMP_CLAUSE_CHAIN (new_clause) = inner_data_clauses; | |
850 | inner_data_clauses = new_clause; | |
851 | ||
852 | prev_mapped_var = v; | |
9b32c166 TS |
853 | |
854 | /* See <https://gcc.gnu.org/PR100280>. */ | |
e5ae22c5 TS |
855 | if (!TREE_ADDRESSABLE (v)) |
856 | { | |
de6e81ea TS |
857 | /* Request that OMP lowering make 'v' addressable. */ |
858 | OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1; | |
e5ae22c5 TS |
859 | |
860 | if (dump_enabled_p ()) | |
861 | { | |
862 | const dump_user_location_t d_u_loc | |
863 | = dump_user_location_t::from_location_t (loc); | |
864 | /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */ | |
865 | #if __GNUC__ >= 10 | |
866 | # pragma GCC diagnostic push | |
867 | # pragma GCC diagnostic ignored "-Wformat" | |
868 | #endif | |
869 | dump_printf_loc (MSG_NOTE, d_u_loc, | |
870 | "OpenACC %<kernels%> decomposition:" | |
871 | " variable %<%T%> declared in block" | |
de6e81ea | 872 | " requested to be made addressable\n", |
e5ae22c5 TS |
873 | v); |
874 | #if __GNUC__ >= 10 | |
875 | # pragma GCC diagnostic pop | |
876 | #endif | |
877 | } | |
878 | } | |
e898ce79 GB |
879 | } |
880 | } | |
881 | ||
882 | if (artificial_vars) | |
883 | body = gimple_build_bind (artificial_vars, body, make_node (BLOCK)); | |
884 | ||
885 | /* If we determined above that there are variables that need to be created | |
886 | on the device, construct a data region for them and wrap the body | |
887 | inside that. */ | |
888 | if (inner_data_clauses != NULL) | |
889 | { | |
890 | gcc_assert (inner_bind_vars != NULL); | |
891 | gimple *inner_data_region | |
892 | = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS, | |
893 | inner_data_clauses); | |
894 | gimple_set_location (inner_data_region, loc); | |
895 | /* Make sure __builtin_GOACC_data_end is called at the end. */ | |
896 | gimple *try_stmt = make_data_region_try_statement (loc, body); | |
897 | gimple_omp_set_body (inner_data_region, try_stmt); | |
898 | gimple *bind_body; | |
899 | if (inner_cleanup != NULL) | |
900 | /* Clobber all the inner variables that need to be clobbered. */ | |
901 | bind_body = gimple_build_try (inner_data_region, inner_cleanup, | |
902 | GIMPLE_TRY_FINALLY); | |
903 | else | |
904 | bind_body = inner_data_region; | |
905 | body = gimple_build_bind (inner_bind_vars, bind_body, make_node (BLOCK)); | |
906 | } | |
907 | ||
908 | return body; | |
909 | } | |
910 | ||
e52253bc JB |
911 | static void |
912 | add_wait (location_t loc, gimple_seq *region_body) | |
913 | { | |
914 | /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0). */ | |
915 | tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT); | |
916 | tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC); | |
917 | gimple *wait_call = gimple_build_call (wait_fn, 2, | |
918 | sync_arg, integer_zero_node); | |
919 | gimple_set_location (wait_call, loc); | |
920 | gimple_seq_add_stmt (region_body, wait_call); | |
921 | } | |
922 | ||
e898ce79 GB |
923 | /* Helper function of decompose_kernels_region_body. The statements in |
924 | REGION_BODY are expected to be decomposed parts; add an 'async' clause to | |
925 | each. Also add a 'wait' directive at the end of the sequence. */ | |
926 | ||
927 | static void | |
928 | add_async_clauses_and_wait (location_t loc, gimple_seq *region_body) | |
929 | { | |
930 | tree default_async_queue | |
931 | = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL); | |
932 | for (gimple_stmt_iterator gsi = gsi_start (*region_body); | |
933 | !gsi_end_p (gsi); | |
934 | gsi_next (&gsi)) | |
935 | { | |
936 | gimple *stmt = gsi_stmt (gsi); | |
937 | tree target_clauses = gimple_omp_target_clauses (stmt); | |
938 | tree new_async_clause = build_omp_clause (loc, OMP_CLAUSE_ASYNC); | |
939 | OMP_CLAUSE_OPERAND (new_async_clause, 0) = default_async_queue; | |
940 | OMP_CLAUSE_CHAIN (new_async_clause) = target_clauses; | |
941 | target_clauses = new_async_clause; | |
942 | gimple_omp_target_set_clauses (as_a <gomp_target *> (stmt), | |
943 | target_clauses); | |
944 | } | |
e52253bc | 945 | add_wait (loc, region_body); |
e898ce79 GB |
946 | } |
947 | ||
948 | /* Auxiliary analysis of the body of a kernels region, to determine for each | |
949 | OpenACC loop whether it is control-dependent (i.e., not necessarily | |
950 | executed every time the kernels region is entered) or not. | |
951 | We say that a loop is control-dependent if there is some cond, switch, or | |
952 | goto statement that jumps over it, forwards or backwards. For example, | |
953 | if the loop is controlled by an if statement, then a jump to the true | |
954 | block, the false block, or from one of those blocks to the control flow | |
955 | join point will necessarily jump over the loop. | |
956 | This analysis implements an ad-hoc union-find data structure classifying | |
957 | statements into "control-flow regions" as follows: Most statements are in | |
958 | the same region as their predecessor, except that each OpenACC loop is in | |
959 | a region of its own, and each OpenACC loop's successor starts a new | |
960 | region. We then unite the regions of any statements linked by jumps, | |
961 | placing any cond, switch, or goto statement in the same region as its | |
962 | target label(s). | |
963 | In the end, control dependence of OpenACC loops can be determined by | |
964 | comparing their immediate predecessor and successor statements' regions. | |
965 | A jump crosses the loop if and only if the predecessor and successor are | |
966 | in the same region. (If there is no predecessor or successor, the loop | |
967 | is executed unconditionally.) | |
968 | The methods in this class identify statements by their index in the | |
969 | kernels region's body. */ | |
970 | ||
971 | class control_flow_regions | |
972 | { | |
973 | public: | |
974 | /* Initialize an instance and pre-compute the control-flow region | |
975 | information for the statement sequence SEQ. */ | |
976 | control_flow_regions (gimple_seq seq); | |
977 | ||
978 | /* Return true if the statement with the given index IDX in the analyzed | |
979 | statement sequence is an unconditionally executed OpenACC loop. */ | |
980 | bool is_unconditional_oacc_for_loop (size_t idx); | |
981 | ||
982 | private: | |
983 | /* Find the region representative for the statement identified by index | |
984 | STMT_IDX. */ | |
985 | size_t find_rep (size_t stmt_idx); | |
986 | ||
987 | /* Union the regions containing the statements represented by | |
988 | representatives A and B. */ | |
989 | void union_reps (size_t a, size_t b); | |
990 | ||
991 | /* Helper for the constructor. Performs the actual computation of the | |
992 | control-flow regions in the statement sequence SEQ. */ | |
993 | void compute_regions (gimple_seq seq); | |
994 | ||
995 | /* The mapping from statement indices to region representatives. */ | |
996 | vec <size_t> representatives; | |
997 | ||
998 | /* A cache mapping statement indices to a flag indicating whether the | |
999 | statement is a top level OpenACC for loop. */ | |
1000 | vec <bool> omp_for_loops; | |
1001 | }; | |
1002 | ||
1003 | control_flow_regions::control_flow_regions (gimple_seq seq) | |
1004 | { | |
1005 | representatives.create (1); | |
1006 | omp_for_loops.create (1); | |
1007 | compute_regions (seq); | |
1008 | } | |
1009 | ||
1010 | bool | |
1011 | control_flow_regions::is_unconditional_oacc_for_loop (size_t idx) | |
1012 | { | |
1013 | if (idx == 0 || idx == representatives.length () - 1) | |
1014 | /* The first or last statement in the kernels region. This means that | |
1015 | there is no room before or after it for a jump or a label. Thus | |
1016 | there cannot be a jump across it, so it is unconditional. */ | |
1017 | return true; | |
1018 | /* Otherwise, the loop is unconditional if the statements before and after | |
1019 | it are in different control flow regions. Scan forward and backward, | |
1020 | skipping over neighboring OpenACC for loops, to find these preceding | |
1021 | statements. */ | |
1022 | size_t prev_index = idx - 1; | |
1023 | while (prev_index > 0 && omp_for_loops [prev_index] == true) | |
1024 | prev_index--; | |
1025 | /* If all preceding statements are also OpenACC loops, all of these are | |
1026 | unconditional. */ | |
1027 | if (prev_index == 0) | |
1028 | return true; | |
1029 | size_t succ_index = idx + 1; | |
1030 | while (succ_index < omp_for_loops.length () | |
1031 | && omp_for_loops [succ_index] == true) | |
1032 | succ_index++; | |
1033 | /* If all following statements are also OpenACC loops, all of these are | |
1034 | unconditional. */ | |
1035 | if (succ_index == omp_for_loops.length ()) | |
1036 | return true; | |
1037 | return (find_rep (prev_index) != find_rep (succ_index)); | |
1038 | } | |
1039 | ||
1040 | size_t | |
1041 | control_flow_regions::find_rep (size_t stmt_idx) | |
1042 | { | |
1043 | size_t rep = stmt_idx, aux = stmt_idx; | |
1044 | /* Find the root representative of this statement. */ | |
1045 | while (representatives[rep] != rep) | |
1046 | rep = representatives[rep]; | |
1047 | /* Compress the path from the original statement to the representative. */ | |
1048 | while (representatives[aux] != rep) | |
1049 | { | |
1050 | size_t tmp = representatives[aux]; | |
1051 | representatives[aux] = rep; | |
1052 | aux = tmp; | |
1053 | } | |
1054 | return rep; | |
1055 | } | |
1056 | ||
1057 | void | |
1058 | control_flow_regions::union_reps (size_t a, size_t b) | |
1059 | { | |
1060 | a = find_rep (a); | |
1061 | b = find_rep (b); | |
1062 | representatives[b] = a; | |
1063 | } | |
1064 | ||
1065 | void | |
1066 | control_flow_regions::compute_regions (gimple_seq seq) | |
1067 | { | |
1068 | hash_map <gimple *, size_t> control_flow_reps; | |
1069 | hash_map <tree, size_t> label_reps; | |
1070 | size_t current_region = 0, idx = 0; | |
1071 | ||
1072 | /* In a first pass, assign an initial region to each statement. Except in | |
1073 | the case of OpenACC loops, each statement simply gets the same region | |
1074 | representative as its predecessor. */ | |
1075 | for (gimple_stmt_iterator gsi = gsi_start (seq); | |
1076 | !gsi_end_p (gsi); | |
1077 | gsi_next (&gsi)) | |
1078 | { | |
1079 | gimple *stmt = gsi_stmt (gsi); | |
1080 | gimple *omp_for = top_level_omp_for_in_stmt (stmt); | |
1081 | omp_for_loops.safe_push (omp_for != NULL); | |
1082 | if (omp_for != NULL) | |
1083 | { | |
1084 | /* Assign a new region to this loop and to its successor. */ | |
1085 | current_region = idx; | |
1086 | representatives.safe_push (current_region); | |
1087 | current_region++; | |
1088 | } | |
1089 | else | |
1090 | { | |
1091 | representatives.safe_push (current_region); | |
1092 | /* Remember any jumps and labels for the second pass below. */ | |
1093 | if (gimple_code (stmt) == GIMPLE_COND | |
1094 | || gimple_code (stmt) == GIMPLE_SWITCH | |
1095 | || gimple_code (stmt) == GIMPLE_GOTO) | |
1096 | control_flow_reps.put (stmt, current_region); | |
1097 | else if (gimple_code (stmt) == GIMPLE_LABEL) | |
1098 | label_reps.put (gimple_label_label (as_a <glabel *> (stmt)), | |
1099 | current_region); | |
1100 | } | |
1101 | idx++; | |
1102 | } | |
1103 | gcc_assert (representatives.length () == omp_for_loops.length ()); | |
1104 | ||
1105 | /* Revisit all the control flow statements and union the region of each | |
1106 | cond, switch, or goto statement with the target labels' regions. */ | |
1107 | for (hash_map <gimple *, size_t>::iterator it = control_flow_reps.begin (); | |
1108 | it != control_flow_reps.end (); | |
1109 | ++it) | |
1110 | { | |
1111 | gimple *stmt = (*it).first; | |
1112 | size_t stmt_rep = (*it).second; | |
1113 | switch (gimple_code (stmt)) | |
1114 | { | |
1115 | tree label; | |
1116 | unsigned int n; | |
1117 | ||
1118 | case GIMPLE_COND: | |
1119 | label = gimple_cond_true_label (as_a <gcond *> (stmt)); | |
1120 | union_reps (stmt_rep, *label_reps.get (label)); | |
1121 | label = gimple_cond_false_label (as_a <gcond *> (stmt)); | |
1122 | union_reps (stmt_rep, *label_reps.get (label)); | |
1123 | break; | |
1124 | ||
1125 | case GIMPLE_SWITCH: | |
1126 | n = gimple_switch_num_labels (as_a <gswitch *> (stmt)); | |
1127 | for (unsigned int i = 0; i < n; i++) | |
1128 | { | |
1129 | tree switch_case | |
1130 | = gimple_switch_label (as_a <gswitch *> (stmt), i); | |
1131 | label = CASE_LABEL (switch_case); | |
1132 | union_reps (stmt_rep, *label_reps.get (label)); | |
1133 | } | |
1134 | break; | |
1135 | ||
1136 | case GIMPLE_GOTO: | |
1137 | label = gimple_goto_dest (stmt); | |
1138 | union_reps (stmt_rep, *label_reps.get (label)); | |
1139 | break; | |
1140 | ||
1141 | default: | |
1142 | gcc_unreachable (); | |
1143 | } | |
1144 | } | |
1145 | } | |
1146 | ||
1147 | /* Decompose the body of the KERNELS_REGION, which was originally annotated | |
1148 | with the KERNELS_CLAUSES, into a series of compute constructs. */ | |
1149 | ||
1150 | static gimple * | |
1151 | decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses) | |
1152 | { | |
1153 | location_t loc = gimple_location (kernels_region); | |
1154 | ||
1155 | /* The kernels clauses will be propagated to the child clauses unmodified, | |
1156 | except that the 'num_gangs', 'num_workers', and 'vector_length' clauses | |
1157 | will only be added to loop regions. The other regions are "gang-single" | |
1158 | and get an explicit 'num_gangs (1)' clause. So separate out the | |
1159 | 'num_gangs', 'num_workers', and 'vector_length' clauses here. | |
1160 | Also check for the presence of an 'async' clause but do not remove it from | |
1161 | the 'kernels' clauses. */ | |
1162 | tree num_gangs_clause = NULL, num_workers_clause = NULL, | |
1163 | vector_length_clause = NULL; | |
1164 | tree async_clause = NULL; | |
1165 | tree prev_clause = NULL, next_clause = NULL; | |
1166 | tree parallel_clauses = kernels_clauses; | |
1167 | for (tree c = parallel_clauses; c; c = next_clause) | |
1168 | { | |
1169 | /* Preserve this here, as we might NULL it later. */ | |
1170 | next_clause = OMP_CLAUSE_CHAIN (c); | |
1171 | ||
1172 | if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS | |
1173 | || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS | |
1174 | || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH) | |
1175 | { | |
1176 | /* Cut this clause out of the chain. */ | |
1177 | if (prev_clause != NULL) | |
1178 | OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (c); | |
1179 | else | |
1180 | kernels_clauses = OMP_CLAUSE_CHAIN (c); | |
1181 | OMP_CLAUSE_CHAIN (c) = NULL; | |
1182 | switch (OMP_CLAUSE_CODE (c)) | |
1183 | { | |
1184 | case OMP_CLAUSE_NUM_GANGS: | |
1185 | num_gangs_clause = c; | |
1186 | break; | |
1187 | case OMP_CLAUSE_NUM_WORKERS: | |
1188 | num_workers_clause = c; | |
1189 | break; | |
1190 | case OMP_CLAUSE_VECTOR_LENGTH: | |
1191 | vector_length_clause = c; | |
1192 | break; | |
1193 | default: | |
1194 | gcc_unreachable (); | |
1195 | } | |
1196 | } | |
1197 | else | |
1198 | prev_clause = c; | |
1199 | if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC) | |
1200 | async_clause = c; | |
1201 | } | |
1202 | ||
1203 | gimple *kernels_body = gimple_omp_body (kernels_region); | |
1204 | gbind *kernels_bind = as_a <gbind *> (kernels_body); | |
1205 | ||
1206 | /* The body of the region may contain other nested binds declaring inner | |
1207 | local variables. Collapse all these binds into one to ensure that we | |
1208 | have a single sequence of statements to iterate over; also, collect all | |
1209 | inner variables. */ | |
1210 | tree inner_bind_vars = flatten_binds (kernels_bind); | |
1211 | gimple_seq body_sequence = gimple_bind_body (kernels_bind); | |
1212 | ||
1213 | /* All these inner variables will get allocated on the device (below, by | |
1214 | calling maybe_build_inner_data_region). Here we create 'present' | |
1215 | clauses for them and add these clauses to the list of clauses to be | |
1216 | attached to each inner compute construct. */ | |
1217 | tree present_clauses = kernels_clauses; | |
1218 | for (tree var = inner_bind_vars; var; var = TREE_CHAIN (var)) | |
1219 | { | |
1220 | if (!DECL_ARTIFICIAL (var) && TREE_CODE (var) != CONST_DECL) | |
1221 | { | |
1222 | tree present_clause = build_omp_clause (loc, OMP_CLAUSE_MAP); | |
1223 | OMP_CLAUSE_SET_MAP_KIND (present_clause, GOMP_MAP_FORCE_PRESENT); | |
1224 | OMP_CLAUSE_DECL (present_clause) = var; | |
1225 | OMP_CLAUSE_SIZE (present_clause) = DECL_SIZE_UNIT (var); | |
1226 | OMP_CLAUSE_CHAIN (present_clause) = present_clauses; | |
1227 | present_clauses = present_clause; | |
1228 | } | |
1229 | } | |
1230 | kernels_clauses = present_clauses; | |
1231 | ||
1232 | /* In addition to nested binds, the "real" body of the region may be | |
1233 | nested inside a try-finally block. Find its cleanup block, which | |
1234 | contains code to clobber the local variables that must be clobbered. */ | |
1235 | gimple *inner_cleanup = NULL; | |
1236 | if (body_sequence != NULL && gimple_code (body_sequence) == GIMPLE_TRY) | |
1237 | { | |
1238 | if (gimple_seq_singleton_p (body_sequence)) | |
1239 | { | |
1240 | /* The try statement is the only thing inside the bind. */ | |
1241 | inner_cleanup = gimple_try_cleanup (body_sequence); | |
1242 | body_sequence = gimple_try_eval (body_sequence); | |
1243 | } | |
1244 | else | |
1245 | { | |
1246 | /* The bind's body starts with a try statement, but it is followed | |
1247 | by other things. */ | |
1248 | gimple_stmt_iterator gsi = gsi_start (body_sequence); | |
1249 | gimple *try_stmt = gsi_stmt (gsi); | |
1250 | inner_cleanup = gimple_try_cleanup (try_stmt); | |
1251 | gimple *try_body = gimple_try_eval (try_stmt); | |
1252 | ||
1253 | gsi_remove (&gsi, false); | |
1254 | /* Now gsi indicates the sequence of statements after the try | |
1255 | statement in the bind. Append the statement in the try body and | |
1256 | the trailing statements from gsi. */ | |
1257 | gsi_insert_seq_before (&gsi, try_body, GSI_CONTINUE_LINKING); | |
1258 | body_sequence = gsi_stmt (gsi); | |
1259 | } | |
1260 | } | |
1261 | ||
1262 | /* This sequence will collect all the top-level statements in the body of | |
1263 | the data region we are about to construct. */ | |
1264 | gimple_seq region_body = NULL; | |
1265 | /* This sequence will collect consecutive statements to be put into a | |
1266 | gang-single region. */ | |
1267 | gimple_seq gang_single_seq = NULL; | |
1268 | /* Flag recording whether the gang_single_seq only contains copies to | |
1269 | local variables. These may be loop setup code that should not be | |
1270 | separated from the loop. */ | |
1271 | bool only_simple_assignments = true; | |
1272 | ||
1273 | /* Precompute the control flow region information to determine whether an | |
1274 | OpenACC loop is executed conditionally or unconditionally. */ | |
1275 | control_flow_regions cf_regions (body_sequence); | |
1276 | ||
1277 | /* Iterate over the statements in the kernels region's body. */ | |
1278 | size_t idx = 0; | |
1279 | gimple_stmt_iterator gsi, gsi_n; | |
1280 | for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n, idx++) | |
1281 | { | |
1282 | /* Advance the iterator here because otherwise it would be invalidated | |
1283 | by moving statements below. */ | |
1284 | gsi_n = gsi; | |
1285 | gsi_next (&gsi_n); | |
1286 | ||
1287 | gimple *stmt = gsi_stmt (gsi); | |
c14ea6a7 TS |
1288 | if (gimple_code (stmt) == GIMPLE_DEBUG) |
1289 | { | |
1290 | if (flag_compare_debug_opt || flag_compare_debug) | |
1291 | /* Let the usual '-fcompare-debug' analysis bail out, as | |
1292 | necessary. */ | |
1293 | ; | |
1294 | else | |
1295 | sorry_at (loc, "%qs not yet supported", | |
1296 | gimple_code_name[gimple_code (stmt)]); | |
1297 | } | |
e898ce79 GB |
1298 | gimple *omp_for = top_level_omp_for_in_stmt (stmt); |
1299 | bool is_unconditional_oacc_for_loop = false; | |
1300 | if (omp_for != NULL) | |
1301 | is_unconditional_oacc_for_loop | |
1302 | = cf_regions.is_unconditional_oacc_for_loop (idx); | |
1303 | if (omp_for != NULL | |
1304 | && is_unconditional_oacc_for_loop) | |
1305 | { | |
1306 | /* This is an OMP for statement, put it into a separate region. | |
1307 | But first, construct a gang-single region containing any | |
1308 | complex sequential statements we may have seen. */ | |
1309 | if (gang_single_seq != NULL && !only_simple_assignments) | |
1310 | { | |
1311 | gimple *single_region | |
1312 | = make_region_seq (loc, gang_single_seq, | |
1313 | num_gangs_clause, | |
1314 | num_workers_clause, | |
1315 | vector_length_clause, | |
1316 | kernels_clauses); | |
1317 | gimple_seq_add_stmt (®ion_body, single_region); | |
1318 | } | |
1319 | else if (gang_single_seq != NULL && only_simple_assignments) | |
1320 | { | |
1321 | /* There is a sequence of sequential statements preceding this | |
1322 | loop, but they are all simple assignments. This is | |
1323 | probably setup code for the loop; in particular, Fortran DO | |
1324 | loops are preceded by code to copy the loop limit variable | |
1325 | to a temporary. Group this code together with the loop | |
1326 | itself. */ | |
1327 | gimple_seq_add_stmt (&gang_single_seq, stmt); | |
1328 | stmt = gimple_build_bind (NULL, gang_single_seq, | |
1329 | make_node (BLOCK)); | |
1330 | } | |
1331 | gang_single_seq = NULL; | |
1332 | only_simple_assignments = true; | |
1333 | ||
1334 | gimple_seq parallel_seq = NULL; | |
1335 | gimple_seq_add_stmt (¶llel_seq, stmt); | |
1336 | gimple *parallel_region | |
1337 | = make_region_loop_nest (omp_for, parallel_seq, | |
1338 | num_gangs_clause, | |
1339 | num_workers_clause, | |
1340 | vector_length_clause, | |
1341 | kernels_clauses); | |
1342 | gimple_seq_add_stmt (®ion_body, parallel_region); | |
1343 | } | |
1344 | else | |
1345 | { | |
1346 | if (omp_for != NULL) | |
1347 | { | |
1348 | gcc_checking_assert (!is_unconditional_oacc_for_loop); | |
1349 | if (dump_enabled_p ()) | |
1350 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, omp_for, | |
1351 | "unparallelized loop nest" | |
1352 | " in OpenACC %<kernels%> region:" | |
1353 | " it's executed conditionally\n"); | |
1354 | } | |
1355 | ||
1356 | /* This is not an unconditional OMP for statement, so it will be | |
1357 | put into a gang-single region. */ | |
1358 | gimple_seq_add_stmt (&gang_single_seq, stmt); | |
1359 | /* Is this a simple assignment? We call it simple if it is an | |
1360 | assignment to an artificial local variable. This captures | |
1361 | Fortran loop setup code computing loop bounds and offsets. */ | |
1362 | bool is_simple_assignment | |
1363 | = (gimple_code (stmt) == GIMPLE_ASSIGN | |
1364 | && TREE_CODE (gimple_assign_lhs (stmt)) == VAR_DECL | |
1365 | && DECL_ARTIFICIAL (gimple_assign_lhs (stmt))); | |
1366 | if (!is_simple_assignment) | |
1367 | only_simple_assignments = false; | |
1368 | } | |
1369 | } | |
1370 | ||
1371 | /* If we did not emit a new region, and are not going to emit one now | |
1372 | (that is, the original region was empty), prepare to emit a dummy so as | |
1373 | to preserve the original construct, which other processing (at least | |
1374 | test cases) depend on. */ | |
1375 | if (region_body == NULL && gang_single_seq == NULL) | |
1376 | { | |
1377 | gimple *stmt = gimple_build_nop (); | |
1378 | gimple_set_location (stmt, loc); | |
1379 | gimple_seq_add_stmt (&gang_single_seq, stmt); | |
1380 | } | |
1381 | ||
1382 | /* Gather up any remaining gang-single statements. */ | |
1383 | if (gang_single_seq != NULL) | |
1384 | { | |
1385 | gimple *single_region | |
1386 | = make_region_seq (loc, gang_single_seq, | |
1387 | num_gangs_clause, | |
1388 | num_workers_clause, | |
1389 | vector_length_clause, | |
1390 | kernels_clauses); | |
1391 | gimple_seq_add_stmt (®ion_body, single_region); | |
1392 | } | |
1393 | ||
1394 | /* We want to launch these kernels asynchronously. If the original | |
1395 | kernels region had an async clause, this is done automatically because | |
1396 | that async clause was copied to the individual regions we created. | |
1397 | Otherwise, add an async clause to each newly created region, as well as | |
1398 | a wait directive at the end. */ | |
1399 | if (async_clause == NULL) | |
1400 | add_async_clauses_and_wait (loc, ®ion_body); | |
e52253bc JB |
1401 | else |
1402 | /* !!! If we have asynchronous parallel blocks inside a (synchronous) data | |
1403 | region, then target memory will get unmapped at the point the data | |
1404 | region ends, even if the inner asynchronous parallels have not yet | |
1405 | completed. For kernels marked "async", we might want to use "enter data | |
1406 | async(...)" and "exit data async(...)" instead, or asynchronous data | |
1407 | regions (see also <https://gcc.gnu.org/PR97390> | |
1408 | "[OpenACC] 'async' clause on 'data' construct", | |
1409 | which is to share the same implementation). | |
1410 | For now, insert a (synchronous) wait at the end of the block. */ | |
1411 | add_wait (loc, ®ion_body); | |
e898ce79 GB |
1412 | |
1413 | tree kernels_locals = gimple_bind_vars (as_a <gbind *> (kernels_body)); | |
1414 | gimple *body = gimple_build_bind (kernels_locals, region_body, | |
1415 | make_node (BLOCK)); | |
1416 | ||
1417 | /* If we found variables declared in nested scopes, build a data region to | |
1418 | map them to the device. */ | |
1419 | body = maybe_build_inner_data_region (loc, body, inner_bind_vars, | |
1420 | inner_cleanup); | |
1421 | ||
1422 | return body; | |
1423 | } | |
1424 | ||
1425 | /* Decompose one OpenACC 'kernels' construct into an OpenACC 'data' construct | |
1426 | containing the original OpenACC 'kernels' construct's region cut up into a | |
1427 | sequence of compute constructs. */ | |
1428 | ||
1429 | static gimple * | |
1430 | omp_oacc_kernels_decompose_1 (gimple *kernels_stmt) | |
1431 | { | |
1432 | gcc_checking_assert (gimple_omp_target_kind (kernels_stmt) | |
1433 | == GF_OMP_TARGET_KIND_OACC_KERNELS); | |
1434 | location_t loc = gimple_location (kernels_stmt); | |
1435 | ||
1436 | /* Collect the data clauses of the OpenACC 'kernels' directive and create a | |
1437 | new OpenACC 'data' construct with those clauses. */ | |
1438 | tree kernels_clauses = gimple_omp_target_clauses (kernels_stmt); | |
1439 | tree data_clauses = NULL; | |
1440 | for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c)) | |
1441 | { | |
1442 | /* Certain clauses are copied to the enclosing OpenACC 'data'. Other | |
1443 | clauses remain on the OpenACC 'kernels'. */ | |
1444 | if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP) | |
1445 | { | |
1446 | tree decl = OMP_CLAUSE_DECL (c); | |
1447 | HOST_WIDE_INT map_kind = OMP_CLAUSE_MAP_KIND (c); | |
1448 | switch (map_kind) | |
1449 | { | |
1450 | default: | |
1451 | if (map_kind == GOMP_MAP_ALLOC | |
1452 | && integer_zerop (OMP_CLAUSE_SIZE (c))) | |
1453 | /* ??? This is an alloc clause for mapping a pointer whose | |
1454 | target is already mapped. We leave these on the inner | |
1455 | compute constructs because moving them to the outer data | |
1456 | region causes runtime errors. */ | |
1457 | break; | |
1458 | ||
1459 | /* For non-artificial variables, and for non-declaration | |
1460 | expressions like A[0:n], copy the clause to the data | |
1461 | region. */ | |
1462 | if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl)) | |
1463 | || !DECL_P (decl)) | |
1464 | { | |
1465 | tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c), | |
1466 | OMP_CLAUSE_MAP); | |
1467 | OMP_CLAUSE_SET_MAP_KIND (new_clause, map_kind); | |
1468 | /* This must be unshared here to avoid "incorrect sharing | |
1469 | of tree nodes" errors from verify_gimple. */ | |
1470 | OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl); | |
1471 | OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c); | |
1472 | OMP_CLAUSE_CHAIN (new_clause) = data_clauses; | |
1473 | data_clauses = new_clause; | |
1474 | ||
1475 | /* Now that this data is mapped, turn the data clause on the | |
1476 | inner OpenACC 'kernels' into a 'present' clause. */ | |
1477 | OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT); | |
337ed336 TS |
1478 | |
1479 | /* See <https://gcc.gnu.org/PR100280>, | |
1480 | <https://gcc.gnu.org/PR104086>. */ | |
1481 | if (DECL_P (decl) | |
1482 | && !TREE_ADDRESSABLE (decl)) | |
1483 | { | |
1484 | /* Request that OMP lowering make 'decl' addressable. */ | |
1485 | OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1; | |
1486 | ||
1487 | if (dump_enabled_p ()) | |
1488 | { | |
1489 | location_t loc = OMP_CLAUSE_LOCATION (new_clause); | |
1490 | const dump_user_location_t d_u_loc | |
1491 | = dump_user_location_t::from_location_t (loc); | |
1492 | /* PR100695 "Format decoder, quoting in 'dump_printf' | |
1493 | etc." */ | |
1494 | #if __GNUC__ >= 10 | |
1495 | # pragma GCC diagnostic push | |
1496 | # pragma GCC diagnostic ignored "-Wformat" | |
1497 | #endif | |
1498 | dump_printf_loc | |
1499 | (MSG_NOTE, d_u_loc, | |
1500 | "OpenACC %<kernels%> decomposition:" | |
1501 | " variable %<%T%> in %qs clause" | |
1502 | " requested to be made addressable\n", | |
1503 | decl, | |
1504 | user_omp_clause_code_name (new_clause, true)); | |
1505 | #if __GNUC__ >= 10 | |
1506 | # pragma GCC diagnostic pop | |
1507 | #endif | |
1508 | } | |
1509 | } | |
e898ce79 GB |
1510 | } |
1511 | break; | |
1512 | ||
1513 | case GOMP_MAP_POINTER: | |
1514 | case GOMP_MAP_TO_PSET: | |
e898ce79 GB |
1515 | case GOMP_MAP_FIRSTPRIVATE_POINTER: |
1516 | case GOMP_MAP_FIRSTPRIVATE_REFERENCE: | |
1517 | /* ??? Copying these map kinds leads to internal compiler | |
1518 | errors in later passes. */ | |
1519 | break; | |
1520 | } | |
1521 | } | |
7b2ae64b TS |
1522 | else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF |
1523 | || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SELF) | |
e898ce79 | 1524 | { |
7b2ae64b TS |
1525 | /* If there is an 'if' or 'self' clause, it must be duplicated to the |
1526 | enclosing data region. Temporarily remove its chain to avoid | |
1527 | copying it. */ | |
e898ce79 GB |
1528 | tree saved_chain = OMP_CLAUSE_CHAIN (c); |
1529 | OMP_CLAUSE_CHAIN (c) = NULL; | |
7b2ae64b | 1530 | tree new_clause = unshare_expr (c); |
e898ce79 | 1531 | OMP_CLAUSE_CHAIN (c) = saved_chain; |
7b2ae64b TS |
1532 | OMP_CLAUSE_CHAIN (new_clause) = data_clauses; |
1533 | data_clauses = new_clause; | |
e898ce79 GB |
1534 | } |
1535 | } | |
1536 | /* Restore the original order of the clauses. */ | |
1537 | data_clauses = nreverse (data_clauses); | |
1538 | ||
1539 | gimple *data_region | |
1540 | = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS, | |
1541 | data_clauses); | |
1542 | gimple_set_location (data_region, loc); | |
1543 | ||
1544 | /* Transform the body of the kernels region into a sequence of compute | |
1545 | constructs. */ | |
1546 | gimple *body = decompose_kernels_region_body (kernels_stmt, | |
1547 | kernels_clauses); | |
1548 | ||
1549 | /* Put the transformed pieces together. The entire body of the region is | |
1550 | wrapped in a try-finally statement that calls __builtin_GOACC_data_end | |
1551 | for cleanup. */ | |
1552 | gimple *try_stmt = make_data_region_try_statement (loc, body); | |
1553 | gimple_omp_set_body (data_region, try_stmt); | |
1554 | ||
1555 | return data_region; | |
1556 | } | |
1557 | ||
1558 | ||
1559 | /* Decompose OpenACC 'kernels' constructs in the current function. */ | |
1560 | ||
1561 | static tree | |
1562 | omp_oacc_kernels_decompose_callback_stmt (gimple_stmt_iterator *gsi_p, | |
1563 | bool *handled_ops_p, | |
1564 | struct walk_stmt_info *) | |
1565 | { | |
1566 | gimple *stmt = gsi_stmt (*gsi_p); | |
1567 | ||
1568 | if ((gimple_code (stmt) == GIMPLE_OMP_TARGET) | |
1569 | && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) | |
1570 | { | |
1571 | gimple *stmt_new = omp_oacc_kernels_decompose_1 (stmt); | |
1572 | gsi_replace (gsi_p, stmt_new, false); | |
1573 | *handled_ops_p = true; | |
1574 | } | |
1575 | else | |
1576 | *handled_ops_p = false; | |
1577 | ||
1578 | return NULL; | |
1579 | } | |
1580 | ||
1581 | static unsigned int | |
1582 | omp_oacc_kernels_decompose (void) | |
1583 | { | |
1584 | gimple_seq body = gimple_body (current_function_decl); | |
1585 | ||
1586 | struct walk_stmt_info wi; | |
1587 | memset (&wi, 0, sizeof (wi)); | |
1588 | walk_gimple_seq_mod (&body, omp_oacc_kernels_decompose_callback_stmt, NULL, | |
1589 | &wi); | |
1590 | ||
1591 | gimple_set_body (current_function_decl, body); | |
1592 | ||
1593 | return 0; | |
1594 | } | |
1595 | ||
1596 | ||
1597 | namespace { | |
1598 | ||
1599 | const pass_data pass_data_omp_oacc_kernels_decompose = | |
1600 | { | |
1601 | GIMPLE_PASS, /* type */ | |
1602 | "omp_oacc_kernels_decompose", /* name */ | |
1603 | OPTGROUP_OMP, /* optinfo_flags */ | |
1604 | TV_NONE, /* tv_id */ | |
1605 | PROP_gimple_any, /* properties_required */ | |
1606 | 0, /* properties_provided */ | |
1607 | 0, /* properties_destroyed */ | |
1608 | 0, /* todo_flags_start */ | |
1609 | 0, /* todo_flags_finish */ | |
1610 | }; | |
1611 | ||
1612 | class pass_omp_oacc_kernels_decompose : public gimple_opt_pass | |
1613 | { | |
1614 | public: | |
1615 | pass_omp_oacc_kernels_decompose (gcc::context *ctxt) | |
1616 | : gimple_opt_pass (pass_data_omp_oacc_kernels_decompose, ctxt) | |
1617 | {} | |
1618 | ||
1619 | /* opt_pass methods: */ | |
725793af | 1620 | bool gate (function *) final override |
e898ce79 GB |
1621 | { |
1622 | return (flag_openacc | |
3395dfc4 | 1623 | && param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE); |
e898ce79 | 1624 | } |
725793af | 1625 | unsigned int execute (function *) final override |
e898ce79 GB |
1626 | { |
1627 | return omp_oacc_kernels_decompose (); | |
1628 | } | |
1629 | ||
1630 | }; // class pass_omp_oacc_kernels_decompose | |
1631 | ||
1632 | } // anon namespace | |
1633 | ||
1634 | gimple_opt_pass * | |
1635 | make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt) | |
1636 | { | |
1637 | return new pass_omp_oacc_kernels_decompose (ctxt); | |
1638 | } |