]>
Commit | Line | Data |
---|---|---|
e2a58ed6 JB |
1 | /* OpenACC worker partitioning via middle end neutering/broadcasting scheme |
2 | ||
aeee4812 | 3 | Copyright (C) 2015-2023 Free Software Foundation, Inc. |
e2a58ed6 JB |
4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published | |
9 | by the Free Software Foundation; either version 3, or (at your | |
10 | option) any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "backend.h" | |
25 | #include "rtl.h" | |
26 | #include "tree.h" | |
27 | #include "gimple.h" | |
28 | #include "tree-pass.h" | |
29 | #include "ssa.h" | |
30 | #include "cgraph.h" | |
31 | #include "pretty-print.h" | |
32 | #include "fold-const.h" | |
33 | #include "gimplify.h" | |
34 | #include "gimple-iterator.h" | |
35 | #include "gimple-walk.h" | |
36 | #include "tree-inline.h" | |
37 | #include "langhooks.h" | |
38 | #include "omp-general.h" | |
39 | #include "omp-low.h" | |
40 | #include "gimple-pretty-print.h" | |
41 | #include "cfghooks.h" | |
42 | #include "insn-config.h" | |
43 | #include "recog.h" | |
44 | #include "internal-fn.h" | |
45 | #include "bitmap.h" | |
46 | #include "tree-nested.h" | |
47 | #include "stor-layout.h" | |
48 | #include "tree-ssa-threadupdate.h" | |
49 | #include "tree-into-ssa.h" | |
50 | #include "splay-tree.h" | |
51 | #include "target.h" | |
52 | #include "cfgloop.h" | |
53 | #include "tree-cfg.h" | |
54 | #include "omp-offload.h" | |
55 | #include "attribs.h" | |
2a3f9f65 JB |
56 | #include "targhooks.h" |
57 | #include "diagnostic-core.h" | |
e2a58ed6 JB |
58 | |
59 | /* Loop structure of the function. The entire function is described as | |
60 | a NULL loop. */ | |
e53b6e56 | 61 | /* Adapted from 'gcc/config/nvptx/nvptx.cc:struct parallel'. */ |
e2a58ed6 JB |
62 | |
63 | struct parallel_g | |
64 | { | |
65 | /* Parent parallel. */ | |
66 | parallel_g *parent; | |
67 | ||
68 | /* Next sibling parallel. */ | |
69 | parallel_g *next; | |
70 | ||
71 | /* First child parallel. */ | |
72 | parallel_g *inner; | |
73 | ||
74 | /* Partitioning mask of the parallel. */ | |
75 | unsigned mask; | |
76 | ||
77 | /* Partitioning used within inner parallels. */ | |
78 | unsigned inner_mask; | |
79 | ||
80 | /* Location of parallel forked and join. The forked is the first | |
81 | block in the parallel and the join is the first block after of | |
82 | the partition. */ | |
83 | basic_block forked_block; | |
84 | basic_block join_block; | |
85 | ||
86 | gimple *forked_stmt; | |
87 | gimple *join_stmt; | |
88 | ||
89 | gimple *fork_stmt; | |
90 | gimple *joining_stmt; | |
91 | ||
92 | /* Basic blocks in this parallel, but not in child parallels. The | |
93 | FORKED and JOINING blocks are in the partition. The FORK and JOIN | |
94 | blocks are not. */ | |
95 | auto_vec<basic_block> blocks; | |
96 | ||
97 | tree record_type; | |
98 | tree sender_decl; | |
99 | tree receiver_decl; | |
100 | ||
101 | public: | |
102 | parallel_g (parallel_g *parent, unsigned mode); | |
103 | ~parallel_g (); | |
104 | }; | |
105 | ||
106 | /* Constructor links the new parallel into it's parent's chain of | |
107 | children. */ | |
108 | ||
109 | parallel_g::parallel_g (parallel_g *parent_, unsigned mask_) | |
110 | :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0) | |
111 | { | |
112 | forked_block = join_block = 0; | |
113 | forked_stmt = join_stmt = NULL; | |
114 | fork_stmt = joining_stmt = NULL; | |
115 | ||
116 | record_type = NULL_TREE; | |
117 | sender_decl = NULL_TREE; | |
118 | receiver_decl = NULL_TREE; | |
119 | ||
120 | if (parent) | |
121 | { | |
122 | next = parent->inner; | |
123 | parent->inner = this; | |
124 | } | |
125 | } | |
126 | ||
127 | parallel_g::~parallel_g () | |
128 | { | |
129 | delete inner; | |
130 | delete next; | |
131 | } | |
132 | ||
133 | static bool | |
134 | local_var_based_p (tree decl) | |
135 | { | |
136 | switch (TREE_CODE (decl)) | |
137 | { | |
138 | case VAR_DECL: | |
139 | return !is_global_var (decl); | |
140 | ||
141 | case COMPONENT_REF: | |
142 | case BIT_FIELD_REF: | |
143 | case ARRAY_REF: | |
144 | return local_var_based_p (TREE_OPERAND (decl, 0)); | |
145 | ||
146 | default: | |
147 | return false; | |
148 | } | |
149 | } | |
150 | ||
151 | /* Map of basic blocks to gimple stmts. */ | |
152 | typedef hash_map<basic_block, gimple *> bb_stmt_map_t; | |
153 | ||
154 | /* Calls to OpenACC routines are made by all workers/wavefronts/warps, since | |
155 | the routine likely contains partitioned loops (else will do its own | |
156 | neutering and variable propagation). Return TRUE if a function call CALL | |
157 | should be made in (worker) single mode instead, rather than redundant | |
158 | mode. */ | |
159 | ||
160 | static bool | |
161 | omp_sese_active_worker_call (gcall *call) | |
162 | { | |
163 | #define GOMP_DIM_SEQ GOMP_DIM_MAX | |
164 | tree fndecl = gimple_call_fndecl (call); | |
165 | ||
166 | if (!fndecl) | |
167 | return true; | |
168 | ||
169 | tree attrs = oacc_get_fn_attrib (fndecl); | |
170 | ||
171 | if (!attrs) | |
172 | return true; | |
173 | ||
174 | int level = oacc_fn_attrib_level (attrs); | |
175 | ||
176 | /* Neither regular functions nor "seq" routines should be run by all threads | |
177 | in worker-single mode. */ | |
178 | return level == -1 || level == GOMP_DIM_SEQ; | |
179 | #undef GOMP_DIM_SEQ | |
180 | } | |
181 | ||
182 | /* Split basic blocks such that each forked and join unspecs are at | |
183 | the start of their basic blocks. Thus afterwards each block will | |
184 | have a single partitioning mode. We also do the same for return | |
185 | insns, as they are executed by every thread. Return the | |
186 | partitioning mode of the function as a whole. Populate MAP with | |
187 | head and tail blocks. We also clear the BB visited flag, which is | |
188 | used when finding partitions. */ | |
e53b6e56 | 189 | /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_split_blocks'. */ |
e2a58ed6 JB |
190 | |
191 | static void | |
192 | omp_sese_split_blocks (bb_stmt_map_t *map) | |
193 | { | |
194 | auto_vec<gimple *> worklist; | |
195 | basic_block block; | |
196 | ||
197 | /* Locate all the reorg instructions of interest. */ | |
198 | FOR_ALL_BB_FN (block, cfun) | |
199 | { | |
200 | /* Clear visited flag, for use by parallel locator */ | |
201 | block->flags &= ~BB_VISITED; | |
202 | ||
203 | for (gimple_stmt_iterator gsi = gsi_start_bb (block); | |
204 | !gsi_end_p (gsi); | |
205 | gsi_next (&gsi)) | |
206 | { | |
207 | gimple *stmt = gsi_stmt (gsi); | |
208 | ||
209 | if (gimple_call_internal_p (stmt, IFN_UNIQUE)) | |
210 | { | |
211 | enum ifn_unique_kind k = ((enum ifn_unique_kind) | |
212 | TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); | |
213 | ||
214 | if (k == IFN_UNIQUE_OACC_JOIN) | |
215 | worklist.safe_push (stmt); | |
216 | else if (k == IFN_UNIQUE_OACC_FORK) | |
217 | { | |
218 | gcc_assert (gsi_one_before_end_p (gsi)); | |
219 | basic_block forked_block = single_succ (block); | |
220 | gimple_stmt_iterator gsi2 = gsi_start_bb (forked_block); | |
221 | ||
222 | /* We push a NOP as a placeholder for the "forked" stmt. | |
223 | This is then recognized in omp_sese_find_par. */ | |
224 | gimple *nop = gimple_build_nop (); | |
225 | gsi_insert_before (&gsi2, nop, GSI_SAME_STMT); | |
226 | ||
227 | worklist.safe_push (nop); | |
228 | } | |
229 | } | |
230 | else if (gimple_code (stmt) == GIMPLE_RETURN | |
231 | || gimple_code (stmt) == GIMPLE_COND | |
232 | || gimple_code (stmt) == GIMPLE_SWITCH | |
233 | || (gimple_code (stmt) == GIMPLE_CALL | |
234 | && !gimple_call_internal_p (stmt) | |
235 | && !omp_sese_active_worker_call (as_a <gcall *> (stmt)))) | |
236 | worklist.safe_push (stmt); | |
237 | else if (is_gimple_assign (stmt)) | |
238 | { | |
239 | tree lhs = gimple_assign_lhs (stmt); | |
240 | ||
241 | /* Force assignments to components/fields/elements of local | |
242 | aggregates into fully-partitioned (redundant) mode. This | |
243 | avoids having to broadcast the whole aggregate. The RHS of | |
244 | the assignment will be propagated using the normal | |
245 | mechanism. */ | |
246 | ||
247 | switch (TREE_CODE (lhs)) | |
248 | { | |
249 | case COMPONENT_REF: | |
250 | case BIT_FIELD_REF: | |
251 | case ARRAY_REF: | |
252 | { | |
253 | tree aggr = TREE_OPERAND (lhs, 0); | |
254 | ||
255 | if (local_var_based_p (aggr)) | |
256 | worklist.safe_push (stmt); | |
257 | } | |
258 | break; | |
259 | ||
260 | default: | |
261 | ; | |
262 | } | |
263 | } | |
264 | } | |
265 | } | |
266 | ||
267 | /* Split blocks on the worklist. */ | |
268 | unsigned ix; | |
269 | gimple *stmt; | |
270 | ||
271 | for (ix = 0; worklist.iterate (ix, &stmt); ix++) | |
272 | { | |
273 | basic_block block = gimple_bb (stmt); | |
274 | ||
275 | if (gimple_code (stmt) == GIMPLE_COND) | |
276 | { | |
277 | gcond *orig_cond = as_a <gcond *> (stmt); | |
278 | tree_code code = gimple_expr_code (orig_cond); | |
279 | tree pred = make_ssa_name (boolean_type_node); | |
280 | gimple *asgn = gimple_build_assign (pred, code, | |
281 | gimple_cond_lhs (orig_cond), | |
282 | gimple_cond_rhs (orig_cond)); | |
283 | gcond *new_cond | |
284 | = gimple_build_cond (NE_EXPR, pred, boolean_false_node, | |
285 | gimple_cond_true_label (orig_cond), | |
286 | gimple_cond_false_label (orig_cond)); | |
287 | ||
288 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
289 | gsi_insert_before (&gsi, asgn, GSI_SAME_STMT); | |
290 | gsi_replace (&gsi, new_cond, true); | |
291 | ||
292 | edge e = split_block (block, asgn); | |
293 | block = e->dest; | |
294 | map->get_or_insert (block) = new_cond; | |
295 | } | |
296 | else if ((gimple_code (stmt) == GIMPLE_CALL | |
297 | && !gimple_call_internal_p (stmt)) | |
298 | || is_gimple_assign (stmt)) | |
299 | { | |
300 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
301 | gsi_prev (&gsi); | |
302 | ||
303 | edge call = split_block (block, gsi_stmt (gsi)); | |
304 | ||
305 | gimple *call_stmt = gsi_stmt (gsi_start_bb (call->dest)); | |
306 | ||
307 | edge call_to_ret = split_block (call->dest, call_stmt); | |
308 | ||
309 | map->get_or_insert (call_to_ret->src) = call_stmt; | |
310 | } | |
311 | else | |
312 | { | |
313 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
314 | gsi_prev (&gsi); | |
315 | ||
316 | if (gsi_end_p (gsi)) | |
317 | map->get_or_insert (block) = stmt; | |
318 | else | |
319 | { | |
320 | /* Split block before insn. The insn is in the new block. */ | |
321 | edge e = split_block (block, gsi_stmt (gsi)); | |
322 | ||
323 | block = e->dest; | |
324 | map->get_or_insert (block) = stmt; | |
325 | } | |
326 | } | |
327 | } | |
328 | } | |
329 | ||
330 | static const char * | |
331 | mask_name (unsigned mask) | |
332 | { | |
333 | switch (mask) | |
334 | { | |
335 | case 0: return "gang redundant"; | |
336 | case 1: return "gang partitioned"; | |
337 | case 2: return "worker partitioned"; | |
338 | case 3: return "gang+worker partitioned"; | |
339 | case 4: return "vector partitioned"; | |
340 | case 5: return "gang+vector partitioned"; | |
341 | case 6: return "worker+vector partitioned"; | |
342 | case 7: return "fully partitioned"; | |
343 | default: return "<illegal>"; | |
344 | } | |
345 | } | |
346 | ||
347 | /* Dump this parallel and all its inner parallels. */ | |
e53b6e56 | 348 | /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_dump_pars'. */ |
e2a58ed6 JB |
349 | |
350 | static void | |
351 | omp_sese_dump_pars (parallel_g *par, unsigned depth) | |
352 | { | |
353 | fprintf (dump_file, "%u: mask %d (%s) head=%d, tail=%d\n", | |
354 | depth, par->mask, mask_name (par->mask), | |
355 | par->forked_block ? par->forked_block->index : -1, | |
356 | par->join_block ? par->join_block->index : -1); | |
357 | ||
358 | fprintf (dump_file, " blocks:"); | |
359 | ||
360 | basic_block block; | |
361 | for (unsigned ix = 0; par->blocks.iterate (ix, &block); ix++) | |
362 | fprintf (dump_file, " %d", block->index); | |
363 | fprintf (dump_file, "\n"); | |
364 | if (par->inner) | |
365 | omp_sese_dump_pars (par->inner, depth + 1); | |
366 | ||
367 | if (par->next) | |
368 | omp_sese_dump_pars (par->next, depth); | |
369 | } | |
370 | ||
371 | /* If BLOCK contains a fork/join marker, process it to create or | |
372 | terminate a loop structure. Add this block to the current loop, | |
373 | and then walk successor blocks. */ | |
e53b6e56 | 374 | /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_find_par'. */ |
e2a58ed6 JB |
375 | |
376 | static parallel_g * | |
377 | omp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block) | |
378 | { | |
379 | if (block->flags & BB_VISITED) | |
380 | return par; | |
381 | block->flags |= BB_VISITED; | |
382 | ||
383 | if (gimple **stmtp = map->get (block)) | |
384 | { | |
385 | gimple *stmt = *stmtp; | |
386 | ||
387 | if (gimple_code (stmt) == GIMPLE_COND | |
388 | || gimple_code (stmt) == GIMPLE_SWITCH | |
389 | || gimple_code (stmt) == GIMPLE_RETURN | |
390 | || (gimple_code (stmt) == GIMPLE_CALL | |
391 | && !gimple_call_internal_p (stmt)) | |
392 | || is_gimple_assign (stmt)) | |
393 | { | |
394 | /* A single block that is forced to be at the maximum partition | |
395 | level. Make a singleton par for it. */ | |
396 | par = new parallel_g (par, GOMP_DIM_MASK (GOMP_DIM_GANG) | |
397 | | GOMP_DIM_MASK (GOMP_DIM_WORKER) | |
398 | | GOMP_DIM_MASK (GOMP_DIM_VECTOR)); | |
399 | par->forked_block = block; | |
400 | par->forked_stmt = stmt; | |
401 | par->blocks.safe_push (block); | |
402 | par = par->parent; | |
403 | goto walk_successors; | |
404 | } | |
405 | else if (gimple_nop_p (stmt)) | |
406 | { | |
407 | basic_block pred = single_pred (block); | |
408 | gcc_assert (pred); | |
409 | gimple_stmt_iterator gsi = gsi_last_bb (pred); | |
410 | gimple *final_stmt = gsi_stmt (gsi); | |
411 | ||
412 | if (gimple_call_internal_p (final_stmt, IFN_UNIQUE)) | |
413 | { | |
414 | gcall *call = as_a <gcall *> (final_stmt); | |
415 | enum ifn_unique_kind k = ((enum ifn_unique_kind) | |
416 | TREE_INT_CST_LOW (gimple_call_arg (call, 0))); | |
417 | ||
418 | if (k == IFN_UNIQUE_OACC_FORK) | |
419 | { | |
420 | HOST_WIDE_INT dim | |
421 | = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); | |
422 | unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0; | |
423 | ||
424 | par = new parallel_g (par, mask); | |
425 | par->forked_block = block; | |
426 | par->forked_stmt = final_stmt; | |
427 | par->fork_stmt = stmt; | |
428 | } | |
429 | else | |
430 | gcc_unreachable (); | |
431 | } | |
432 | else | |
433 | gcc_unreachable (); | |
434 | } | |
435 | else if (gimple_call_internal_p (stmt, IFN_UNIQUE)) | |
436 | { | |
437 | gcall *call = as_a <gcall *> (stmt); | |
438 | enum ifn_unique_kind k = ((enum ifn_unique_kind) | |
439 | TREE_INT_CST_LOW (gimple_call_arg (call, 0))); | |
440 | if (k == IFN_UNIQUE_OACC_JOIN) | |
441 | { | |
442 | HOST_WIDE_INT dim = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2)); | |
443 | unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0; | |
444 | ||
445 | gcc_assert (par->mask == mask); | |
446 | par->join_block = block; | |
447 | par->join_stmt = stmt; | |
448 | par = par->parent; | |
449 | } | |
450 | else | |
451 | gcc_unreachable (); | |
452 | } | |
453 | else | |
454 | gcc_unreachable (); | |
455 | } | |
456 | ||
457 | if (par) | |
458 | /* Add this block onto the current loop's list of blocks. */ | |
459 | par->blocks.safe_push (block); | |
460 | else | |
461 | /* This must be the entry block. Create a NULL parallel. */ | |
462 | par = new parallel_g (0, 0); | |
463 | ||
464 | walk_successors: | |
465 | /* Walk successor blocks. */ | |
466 | edge e; | |
467 | edge_iterator ei; | |
468 | ||
469 | FOR_EACH_EDGE (e, ei, block->succs) | |
470 | omp_sese_find_par (map, par, e->dest); | |
471 | ||
472 | return par; | |
473 | } | |
474 | ||
475 | /* DFS walk the CFG looking for fork & join markers. Construct | |
476 | loop structures as we go. MAP is a mapping of basic blocks | |
477 | to head & tail markers, discovered when splitting blocks. This | |
478 | speeds up the discovery. We rely on the BB visited flag having | |
479 | been cleared when splitting blocks. */ | |
e53b6e56 | 480 | /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_discover_pars'. */ |
e2a58ed6 JB |
481 | |
482 | static parallel_g * | |
483 | omp_sese_discover_pars (bb_stmt_map_t *map) | |
484 | { | |
485 | basic_block block; | |
486 | ||
487 | /* Mark exit blocks as visited. */ | |
488 | block = EXIT_BLOCK_PTR_FOR_FN (cfun); | |
489 | block->flags |= BB_VISITED; | |
490 | ||
491 | /* And entry block as not. */ | |
492 | block = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
493 | block->flags &= ~BB_VISITED; | |
494 | ||
495 | parallel_g *par = omp_sese_find_par (map, 0, block); | |
496 | ||
497 | if (dump_file) | |
498 | { | |
499 | fprintf (dump_file, "\nLoops\n"); | |
500 | omp_sese_dump_pars (par, 0); | |
501 | fprintf (dump_file, "\n"); | |
502 | } | |
503 | ||
504 | return par; | |
505 | } | |
506 | ||
507 | static void | |
508 | populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single, | |
509 | bitmap vector_single, unsigned outer_mask, | |
510 | int depth) | |
511 | { | |
512 | unsigned mask = outer_mask | par->mask; | |
513 | ||
514 | basic_block block; | |
515 | ||
516 | for (unsigned i = 0; par->blocks.iterate (i, &block); i++) | |
517 | { | |
518 | if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0) | |
519 | bitmap_set_bit (worker_single, block->index); | |
520 | ||
521 | if ((mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) == 0) | |
522 | bitmap_set_bit (vector_single, block->index); | |
523 | } | |
524 | ||
525 | if (par->inner) | |
526 | populate_single_mode_bitmaps (par->inner, worker_single, vector_single, | |
527 | mask, depth + 1); | |
528 | if (par->next) | |
529 | populate_single_mode_bitmaps (par->next, worker_single, vector_single, | |
530 | outer_mask, depth); | |
531 | } | |
532 | ||
533 | /* A map from SSA names or var decls to record fields. */ | |
534 | ||
535 | typedef hash_map<tree, tree> field_map_t; | |
536 | ||
537 | /* For each propagation record type, this is a map from SSA names or var decls | |
538 | to propagate, to the field in the record type that should be used for | |
539 | transmission and reception. */ | |
540 | ||
0fe9176f | 541 | typedef hash_map<tree, field_map_t> record_field_map_t; |
e2a58ed6 | 542 | |
e2a58ed6 | 543 | static void |
049eda82 | 544 | install_var_field (tree var, tree record_type, field_map_t *fields) |
e2a58ed6 | 545 | { |
e2a58ed6 JB |
546 | tree name; |
547 | char tmp[20]; | |
548 | ||
549 | if (TREE_CODE (var) == SSA_NAME) | |
550 | { | |
551 | name = SSA_NAME_IDENTIFIER (var); | |
552 | if (!name) | |
553 | { | |
554 | sprintf (tmp, "_%u", (unsigned) SSA_NAME_VERSION (var)); | |
555 | name = get_identifier (tmp); | |
556 | } | |
557 | } | |
a64c2b0d | 558 | else if (VAR_P (var)) |
e2a58ed6 JB |
559 | { |
560 | name = DECL_NAME (var); | |
561 | if (!name) | |
562 | { | |
563 | sprintf (tmp, "D_%u", (unsigned) DECL_UID (var)); | |
564 | name = get_identifier (tmp); | |
565 | } | |
566 | } | |
567 | else | |
568 | gcc_unreachable (); | |
569 | ||
570 | gcc_assert (!fields->get (var)); | |
571 | ||
572 | tree type = TREE_TYPE (var); | |
573 | ||
574 | if (POINTER_TYPE_P (type) | |
575 | && TYPE_RESTRICT (type)) | |
576 | type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT); | |
577 | ||
578 | tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, name, type); | |
579 | ||
a64c2b0d | 580 | if (VAR_P (var) && type == TREE_TYPE (var)) |
e2a58ed6 JB |
581 | { |
582 | SET_DECL_ALIGN (field, DECL_ALIGN (var)); | |
583 | DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var); | |
584 | TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var); | |
585 | } | |
586 | else | |
587 | SET_DECL_ALIGN (field, TYPE_ALIGN (type)); | |
588 | ||
589 | fields->put (var, field); | |
590 | ||
591 | insert_field_into_struct (record_type, field); | |
592 | } | |
593 | ||
594 | /* Sets of SSA_NAMES or VAR_DECLs to propagate. */ | |
595 | typedef hash_set<tree> propagation_set; | |
596 | ||
597 | static void | |
598 | find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask, | |
599 | bitmap worker_single, bitmap vector_single, | |
600 | vec<propagation_set *> *prop_set) | |
601 | { | |
602 | unsigned mask = outer_mask | par->mask; | |
603 | ||
604 | if (par->inner) | |
605 | find_ssa_names_to_propagate (par->inner, mask, worker_single, | |
606 | vector_single, prop_set); | |
607 | if (par->next) | |
608 | find_ssa_names_to_propagate (par->next, outer_mask, worker_single, | |
609 | vector_single, prop_set); | |
610 | ||
611 | if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) | |
612 | { | |
613 | basic_block block; | |
614 | int ix; | |
615 | ||
616 | for (ix = 0; par->blocks.iterate (ix, &block); ix++) | |
617 | { | |
618 | for (gphi_iterator psi = gsi_start_phis (block); | |
619 | !gsi_end_p (psi); gsi_next (&psi)) | |
620 | { | |
621 | gphi *phi = psi.phi (); | |
622 | use_operand_p use; | |
623 | ssa_op_iter iter; | |
624 | ||
625 | FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE) | |
626 | { | |
627 | tree var = USE_FROM_PTR (use); | |
628 | ||
629 | if (TREE_CODE (var) != SSA_NAME) | |
630 | continue; | |
631 | ||
632 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); | |
633 | ||
634 | if (gimple_nop_p (def_stmt)) | |
635 | continue; | |
636 | ||
637 | basic_block def_bb = gimple_bb (def_stmt); | |
638 | ||
639 | if (bitmap_bit_p (worker_single, def_bb->index)) | |
640 | { | |
641 | if (!(*prop_set)[def_bb->index]) | |
642 | (*prop_set)[def_bb->index] = new propagation_set; | |
643 | ||
644 | propagation_set *ws_prop = (*prop_set)[def_bb->index]; | |
645 | ||
646 | ws_prop->add (var); | |
647 | } | |
648 | } | |
649 | } | |
650 | ||
651 | for (gimple_stmt_iterator gsi = gsi_start_bb (block); | |
652 | !gsi_end_p (gsi); gsi_next (&gsi)) | |
653 | { | |
654 | use_operand_p use; | |
655 | ssa_op_iter iter; | |
656 | gimple *stmt = gsi_stmt (gsi); | |
657 | ||
658 | FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) | |
659 | { | |
660 | tree var = USE_FROM_PTR (use); | |
661 | ||
662 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); | |
663 | ||
664 | if (gimple_nop_p (def_stmt)) | |
665 | continue; | |
666 | ||
667 | basic_block def_bb = gimple_bb (def_stmt); | |
668 | ||
669 | if (bitmap_bit_p (worker_single, def_bb->index)) | |
670 | { | |
671 | if (!(*prop_set)[def_bb->index]) | |
672 | (*prop_set)[def_bb->index] = new propagation_set; | |
673 | ||
674 | propagation_set *ws_prop = (*prop_set)[def_bb->index]; | |
675 | ||
676 | ws_prop->add (var); | |
677 | } | |
678 | } | |
679 | } | |
680 | } | |
681 | } | |
682 | } | |
683 | ||
684 | /* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a | |
685 | statement. */ | |
686 | ||
687 | static tree | |
688 | find_partitioned_var_uses_1 (tree *node, int *, void *data) | |
689 | { | |
690 | walk_stmt_info *wi = (walk_stmt_info *) data; | |
691 | hash_set<tree> *partitioned_var_uses = (hash_set<tree> *) wi->info; | |
692 | ||
693 | if (!wi->is_lhs && VAR_P (*node)) | |
694 | partitioned_var_uses->add (*node); | |
695 | ||
696 | return NULL_TREE; | |
697 | } | |
698 | ||
699 | static void | |
700 | find_partitioned_var_uses (parallel_g *par, unsigned outer_mask, | |
701 | hash_set<tree> *partitioned_var_uses) | |
702 | { | |
703 | unsigned mask = outer_mask | par->mask; | |
704 | ||
705 | if (par->inner) | |
706 | find_partitioned_var_uses (par->inner, mask, partitioned_var_uses); | |
707 | if (par->next) | |
708 | find_partitioned_var_uses (par->next, outer_mask, partitioned_var_uses); | |
709 | ||
710 | if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) | |
711 | { | |
712 | basic_block block; | |
713 | int ix; | |
714 | ||
715 | for (ix = 0; par->blocks.iterate (ix, &block); ix++) | |
716 | for (gimple_stmt_iterator gsi = gsi_start_bb (block); | |
717 | !gsi_end_p (gsi); gsi_next (&gsi)) | |
718 | { | |
719 | walk_stmt_info wi; | |
720 | memset (&wi, 0, sizeof (wi)); | |
721 | wi.info = (void *) partitioned_var_uses; | |
722 | walk_gimple_stmt (&gsi, NULL, find_partitioned_var_uses_1, &wi); | |
723 | } | |
724 | } | |
725 | } | |
726 | ||
727 | /* Gang-private variables (typically placed in a GPU's shared memory) do not | |
728 | need to be processed by the worker-propagation mechanism. Populate the | |
729 | GANG_PRIVATE_VARS set with any such variables found in the current | |
730 | function. */ | |
731 | ||
732 | static void | |
733 | find_gang_private_vars (hash_set<tree> *gang_private_vars) | |
734 | { | |
735 | basic_block block; | |
736 | ||
737 | FOR_EACH_BB_FN (block, cfun) | |
738 | { | |
739 | for (gimple_stmt_iterator gsi = gsi_start_bb (block); | |
740 | !gsi_end_p (gsi); | |
741 | gsi_next (&gsi)) | |
742 | { | |
743 | gimple *stmt = gsi_stmt (gsi); | |
744 | ||
745 | if (gimple_call_internal_p (stmt, IFN_UNIQUE)) | |
746 | { | |
747 | enum ifn_unique_kind k = ((enum ifn_unique_kind) | |
748 | TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); | |
749 | if (k == IFN_UNIQUE_OACC_PRIVATE) | |
750 | { | |
751 | HOST_WIDE_INT level | |
752 | = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2)); | |
753 | if (level != GOMP_DIM_GANG) | |
754 | continue; | |
755 | for (unsigned i = 3; i < gimple_call_num_args (stmt); i++) | |
756 | { | |
757 | tree arg = gimple_call_arg (stmt, i); | |
758 | gcc_assert (TREE_CODE (arg) == ADDR_EXPR); | |
759 | tree decl = TREE_OPERAND (arg, 0); | |
760 | gang_private_vars->add (decl); | |
761 | } | |
762 | } | |
763 | } | |
764 | } | |
765 | } | |
766 | } | |
767 | ||
768 | static void | |
769 | find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask, | |
770 | hash_set<tree> *partitioned_var_uses, | |
771 | hash_set<tree> *gang_private_vars, | |
2961ac45 | 772 | bitmap writes_gang_private, |
e2a58ed6 JB |
773 | vec<propagation_set *> *prop_set) |
774 | { | |
775 | unsigned mask = outer_mask | par->mask; | |
776 | ||
777 | if (par->inner) | |
778 | find_local_vars_to_propagate (par->inner, mask, partitioned_var_uses, | |
2961ac45 JB |
779 | gang_private_vars, writes_gang_private, |
780 | prop_set); | |
e2a58ed6 JB |
781 | if (par->next) |
782 | find_local_vars_to_propagate (par->next, outer_mask, partitioned_var_uses, | |
2961ac45 JB |
783 | gang_private_vars, writes_gang_private, |
784 | prop_set); | |
e2a58ed6 JB |
785 | |
786 | if (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))) | |
787 | { | |
788 | basic_block block; | |
789 | int ix; | |
790 | ||
791 | for (ix = 0; par->blocks.iterate (ix, &block); ix++) | |
792 | { | |
793 | for (gimple_stmt_iterator gsi = gsi_start_bb (block); | |
794 | !gsi_end_p (gsi); gsi_next (&gsi)) | |
795 | { | |
796 | gimple *stmt = gsi_stmt (gsi); | |
797 | tree var; | |
798 | unsigned i; | |
799 | ||
800 | FOR_EACH_LOCAL_DECL (cfun, i, var) | |
801 | { | |
802 | if (!VAR_P (var) | |
803 | || is_global_var (var) | |
804 | || AGGREGATE_TYPE_P (TREE_TYPE (var)) | |
2961ac45 | 805 | || !partitioned_var_uses->contains (var)) |
e2a58ed6 JB |
806 | continue; |
807 | ||
808 | if (stmt_may_clobber_ref_p (stmt, var)) | |
809 | { | |
810 | if (dump_file) | |
811 | { | |
812 | fprintf (dump_file, "bb %u: local variable may be " | |
813 | "clobbered in %s mode: ", block->index, | |
814 | mask_name (mask)); | |
815 | print_generic_expr (dump_file, var, TDF_SLIM); | |
816 | fprintf (dump_file, "\n"); | |
817 | } | |
818 | ||
2961ac45 JB |
819 | if (gang_private_vars->contains (var)) |
820 | { | |
821 | /* If we write a gang-private variable, we want a | |
822 | barrier at the end of the block. */ | |
823 | bitmap_set_bit (writes_gang_private, block->index); | |
824 | continue; | |
825 | } | |
826 | ||
e2a58ed6 JB |
827 | if (!(*prop_set)[block->index]) |
828 | (*prop_set)[block->index] = new propagation_set; | |
829 | ||
830 | propagation_set *ws_prop | |
831 | = (*prop_set)[block->index]; | |
832 | ||
833 | ws_prop->add (var); | |
834 | } | |
835 | } | |
836 | } | |
837 | } | |
838 | } | |
839 | } | |
840 | ||
841 | /* Transform basic blocks FROM, TO (which may be the same block) into: | |
842 | if (GOACC_single_start ()) | |
843 | BLOCK; | |
844 | GOACC_barrier (); | |
845 | \ | / | |
846 | +----+ | |
847 | | | (new) predicate block | |
848 | +----+-- | |
849 | \ | / \ | / |t \ | |
850 | +----+ +----+ +----+ | | |
851 | | | | | ===> | | | f (old) from block | |
852 | +----+ +----+ +----+ | | |
853 | | t/ \f | / | |
854 | +----+/ | |
855 | (split (split before | | skip block | |
856 | at end) condition) +----+ | |
857 | t/ \f | |
858 | */ | |
859 | ||
860 | static void | |
861 | worker_single_simple (basic_block from, basic_block to, | |
862 | hash_set<tree> *def_escapes_block) | |
863 | { | |
864 | gimple *call, *cond; | |
865 | tree lhs, decl; | |
866 | basic_block skip_block; | |
867 | ||
868 | gimple_stmt_iterator gsi = gsi_last_bb (to); | |
869 | if (EDGE_COUNT (to->succs) > 1) | |
870 | { | |
871 | gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND); | |
872 | gsi_prev (&gsi); | |
873 | } | |
874 | edge e = split_block (to, gsi_stmt (gsi)); | |
875 | skip_block = e->dest; | |
876 | ||
877 | gimple_stmt_iterator start = gsi_after_labels (from); | |
878 | ||
879 | decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_START); | |
880 | lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl))); | |
881 | call = gimple_build_call (decl, 0); | |
882 | gimple_call_set_lhs (call, lhs); | |
883 | gsi_insert_before (&start, call, GSI_NEW_STMT); | |
884 | update_stmt (call); | |
885 | ||
886 | cond = gimple_build_cond (EQ_EXPR, lhs, | |
887 | fold_convert_loc (UNKNOWN_LOCATION, | |
888 | TREE_TYPE (lhs), | |
889 | boolean_true_node), | |
890 | NULL_TREE, NULL_TREE); | |
891 | gsi_insert_after (&start, cond, GSI_NEW_STMT); | |
892 | update_stmt (cond); | |
893 | ||
894 | edge et = split_block (from, cond); | |
895 | et->flags &= ~EDGE_FALLTHRU; | |
896 | et->flags |= EDGE_TRUE_VALUE; | |
897 | /* Make the active worker the more probable path so we prefer fallthrough | |
898 | (letting the idle workers jump around more). */ | |
899 | et->probability = profile_probability::likely (); | |
900 | ||
901 | edge ef = make_edge (from, skip_block, EDGE_FALSE_VALUE); | |
902 | ef->probability = et->probability.invert (); | |
903 | ||
904 | basic_block neutered = split_edge (ef); | |
905 | gimple_stmt_iterator neut_gsi = gsi_last_bb (neutered); | |
906 | ||
907 | for (gsi = gsi_start_bb (et->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
908 | { | |
909 | gimple *stmt = gsi_stmt (gsi); | |
910 | ssa_op_iter iter; | |
911 | tree var; | |
912 | ||
913 | FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF) | |
914 | { | |
915 | if (def_escapes_block->contains (var)) | |
916 | { | |
917 | gphi *join_phi = create_phi_node (NULL_TREE, skip_block); | |
918 | create_new_def_for (var, join_phi, | |
919 | gimple_phi_result_ptr (join_phi)); | |
920 | add_phi_arg (join_phi, var, e, UNKNOWN_LOCATION); | |
921 | ||
922 | tree neutered_def = copy_ssa_name (var, NULL); | |
923 | /* We really want "don't care" or some value representing | |
924 | undefined here, but optimizers will probably get rid of the | |
925 | zero-assignments anyway. */ | |
926 | gassign *zero = gimple_build_assign (neutered_def, | |
927 | build_zero_cst (TREE_TYPE (neutered_def))); | |
928 | ||
929 | gsi_insert_after (&neut_gsi, zero, GSI_CONTINUE_LINKING); | |
930 | update_stmt (zero); | |
931 | ||
932 | add_phi_arg (join_phi, neutered_def, single_succ_edge (neutered), | |
933 | UNKNOWN_LOCATION); | |
934 | update_stmt (join_phi); | |
935 | } | |
936 | } | |
937 | } | |
e2a58ed6 JB |
938 | } |
939 | ||
e2a58ed6 | 940 | static tree |
049eda82 | 941 | build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields) |
e2a58ed6 | 942 | { |
e2a58ed6 JB |
943 | tree x = build_simple_mem_ref (receiver_decl); |
944 | tree field = *fields->get (var); | |
945 | TREE_THIS_NOTRAP (x) = 1; | |
54f74502 | 946 | x = omp_build_component_ref (x, field); |
e2a58ed6 JB |
947 | return x; |
948 | } | |
949 | ||
950 | static tree | |
049eda82 | 951 | build_sender_ref (tree var, tree sender_decl, field_map_t *fields) |
e2a58ed6 | 952 | { |
2a3f9f65 JB |
953 | if (POINTER_TYPE_P (TREE_TYPE (sender_decl))) |
954 | sender_decl = build_simple_mem_ref (sender_decl); | |
e2a58ed6 | 955 | tree field = *fields->get (var); |
54f74502 | 956 | return omp_build_component_ref (sender_decl, field); |
e2a58ed6 JB |
957 | } |
958 | ||
959 | static int | |
960 | sort_by_ssa_version_or_uid (const void *p1, const void *p2) | |
961 | { | |
962 | const tree t1 = *(const tree *)p1; | |
963 | const tree t2 = *(const tree *)p2; | |
964 | ||
965 | if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) == SSA_NAME) | |
966 | return SSA_NAME_VERSION (t1) - SSA_NAME_VERSION (t2); | |
967 | else if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) != SSA_NAME) | |
968 | return -1; | |
969 | else if (TREE_CODE (t1) != SSA_NAME && TREE_CODE (t2) == SSA_NAME) | |
970 | return 1; | |
971 | else | |
972 | return DECL_UID (t1) - DECL_UID (t2); | |
973 | } | |
974 | ||
975 | static int | |
976 | sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2) | |
977 | { | |
978 | const tree t1 = *(const tree *)p1; | |
979 | const tree t2 = *(const tree *)p2; | |
980 | unsigned HOST_WIDE_INT s1 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1))); | |
981 | unsigned HOST_WIDE_INT s2 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2))); | |
982 | if (s1 != s2) | |
983 | return s2 - s1; | |
984 | else | |
985 | return sort_by_ssa_version_or_uid (p1, p2); | |
986 | } | |
987 | ||
988 | static void | |
989 | worker_single_copy (basic_block from, basic_block to, | |
990 | hash_set<tree> *def_escapes_block, | |
991 | hash_set<tree> *worker_partitioned_uses, | |
2a3f9f65 JB |
992 | tree record_type, record_field_map_t *record_field_map, |
993 | unsigned HOST_WIDE_INT placement, | |
2961ac45 | 994 | bool isolate_broadcasts, bool has_gang_private_write) |
e2a58ed6 JB |
995 | { |
996 | /* If we only have virtual defs, we'll have no record type, but we still want | |
997 | to emit single_copy_start and (particularly) single_copy_end to act as | |
998 | a vdef source on the neutered edge representing memory writes on the | |
999 | non-neutered edge. */ | |
1000 | if (!record_type) | |
1001 | record_type = char_type_node; | |
1002 | ||
1003 | tree sender_decl | |
1004 | = targetm.goacc.create_worker_broadcast_record (record_type, true, | |
2a3f9f65 JB |
1005 | ".oacc_worker_o", |
1006 | placement); | |
e2a58ed6 JB |
1007 | tree receiver_decl |
1008 | = targetm.goacc.create_worker_broadcast_record (record_type, false, | |
2a3f9f65 JB |
1009 | ".oacc_worker_i", |
1010 | placement); | |
e2a58ed6 JB |
1011 | |
1012 | gimple_stmt_iterator gsi = gsi_last_bb (to); | |
1013 | if (EDGE_COUNT (to->succs) > 1) | |
1014 | gsi_prev (&gsi); | |
1015 | edge e = split_block (to, gsi_stmt (gsi)); | |
1016 | basic_block barrier_block = e->dest; | |
1017 | ||
1018 | gimple_stmt_iterator start = gsi_after_labels (from); | |
1019 | ||
1020 | tree decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START); | |
1021 | ||
1022 | tree lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl))); | |
1023 | ||
2a3f9f65 JB |
1024 | gimple *call |
1025 | = gimple_build_call (decl, 1, | |
1026 | POINTER_TYPE_P (TREE_TYPE (sender_decl)) | |
1027 | ? sender_decl : build_fold_addr_expr (sender_decl)); | |
e2a58ed6 JB |
1028 | gimple_call_set_lhs (call, lhs); |
1029 | gsi_insert_before (&start, call, GSI_NEW_STMT); | |
1030 | update_stmt (call); | |
1031 | ||
2a3f9f65 JB |
1032 | /* The shared-memory range for this block overflowed. Add a barrier before |
1033 | the GOACC_single_copy_start call. */ | |
1034 | if (isolate_broadcasts) | |
1035 | { | |
1036 | decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); | |
1037 | gimple *acc_bar = gimple_build_call (decl, 0); | |
1038 | gsi_insert_before (&start, acc_bar, GSI_SAME_STMT); | |
1039 | } | |
1040 | ||
e2a58ed6 JB |
1041 | tree conv_tmp = make_ssa_name (TREE_TYPE (receiver_decl)); |
1042 | ||
1043 | gimple *conv = gimple_build_assign (conv_tmp, | |
1044 | fold_convert (TREE_TYPE (receiver_decl), | |
1045 | lhs)); | |
1046 | update_stmt (conv); | |
1047 | gsi_insert_after (&start, conv, GSI_NEW_STMT); | |
1048 | gimple *asgn = gimple_build_assign (receiver_decl, conv_tmp); | |
1049 | gsi_insert_after (&start, asgn, GSI_NEW_STMT); | |
1050 | update_stmt (asgn); | |
1051 | ||
1052 | tree zero_ptr = build_int_cst (TREE_TYPE (receiver_decl), 0); | |
1053 | ||
1054 | tree recv_tmp = make_ssa_name (TREE_TYPE (receiver_decl)); | |
1055 | asgn = gimple_build_assign (recv_tmp, receiver_decl); | |
1056 | gsi_insert_after (&start, asgn, GSI_NEW_STMT); | |
1057 | update_stmt (asgn); | |
1058 | ||
1059 | gimple *cond = gimple_build_cond (EQ_EXPR, recv_tmp, zero_ptr, NULL_TREE, | |
1060 | NULL_TREE); | |
1061 | update_stmt (cond); | |
1062 | ||
1063 | gsi_insert_after (&start, cond, GSI_NEW_STMT); | |
1064 | ||
1065 | edge et = split_block (from, cond); | |
1066 | et->flags &= ~EDGE_FALLTHRU; | |
1067 | et->flags |= EDGE_TRUE_VALUE; | |
1068 | /* Make the active worker the more probable path so we prefer fallthrough | |
1069 | (letting the idle workers jump around more). */ | |
1070 | et->probability = profile_probability::likely (); | |
1071 | ||
1072 | basic_block body = et->dest; | |
1073 | ||
1074 | edge ef = make_edge (from, barrier_block, EDGE_FALSE_VALUE); | |
1075 | ef->probability = et->probability.invert (); | |
1076 | ||
e2a58ed6 | 1077 | gimple_stmt_iterator bar_gsi = gsi_start_bb (barrier_block); |
e2a58ed6 | 1078 | cond = gimple_build_cond (NE_EXPR, recv_tmp, zero_ptr, NULL_TREE, NULL_TREE); |
2961ac45 JB |
1079 | |
1080 | if (record_type != char_type_node || has_gang_private_write) | |
1081 | { | |
1082 | decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); | |
1083 | gimple *acc_bar = gimple_build_call (decl, 0); | |
1084 | ||
1085 | gsi_insert_before (&bar_gsi, acc_bar, GSI_NEW_STMT); | |
1086 | gsi_insert_after (&bar_gsi, cond, GSI_NEW_STMT); | |
1087 | } | |
1088 | else | |
1089 | gsi_insert_before (&bar_gsi, cond, GSI_NEW_STMT); | |
e2a58ed6 JB |
1090 | |
1091 | edge et2 = split_block (barrier_block, cond); | |
1092 | et2->flags &= ~EDGE_FALLTHRU; | |
1093 | et2->flags |= EDGE_TRUE_VALUE; | |
1094 | et2->probability = profile_probability::unlikely (); | |
1095 | ||
1096 | basic_block exit_block = et2->dest; | |
1097 | ||
1098 | basic_block copyout_block = split_edge (et2); | |
1099 | edge ef2 = make_edge (barrier_block, exit_block, EDGE_FALSE_VALUE); | |
1100 | ef2->probability = et2->probability.invert (); | |
1101 | ||
1102 | gimple_stmt_iterator copyout_gsi = gsi_start_bb (copyout_block); | |
1103 | ||
1104 | edge copyout_to_exit = single_succ_edge (copyout_block); | |
1105 | ||
1106 | gimple_seq sender_seq = NULL; | |
1107 | ||
1108 | /* Make sure we iterate over definitions in a stable order. */ | |
1109 | auto_vec<tree> escape_vec (def_escapes_block->elements ()); | |
1110 | for (hash_set<tree>::iterator it = def_escapes_block->begin (); | |
1111 | it != def_escapes_block->end (); ++it) | |
1112 | escape_vec.quick_push (*it); | |
1113 | escape_vec.qsort (sort_by_ssa_version_or_uid); | |
1114 | ||
1115 | for (unsigned i = 0; i < escape_vec.length (); i++) | |
1116 | { | |
1117 | tree var = escape_vec[i]; | |
1118 | ||
1119 | if (TREE_CODE (var) == SSA_NAME && SSA_NAME_IS_VIRTUAL_OPERAND (var)) | |
1120 | continue; | |
1121 | ||
1122 | tree barrier_def = 0; | |
1123 | ||
1124 | if (TREE_CODE (var) == SSA_NAME) | |
1125 | { | |
1126 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); | |
1127 | ||
1128 | if (gimple_nop_p (def_stmt)) | |
1129 | continue; | |
1130 | ||
1131 | /* The barrier phi takes one result from the actual work of the | |
1132 | block we're neutering, and the other result is constant zero of | |
1133 | the same type. */ | |
1134 | ||
1135 | gphi *barrier_phi = create_phi_node (NULL_TREE, barrier_block); | |
1136 | barrier_def = create_new_def_for (var, barrier_phi, | |
1137 | gimple_phi_result_ptr (barrier_phi)); | |
1138 | ||
1139 | add_phi_arg (barrier_phi, var, e, UNKNOWN_LOCATION); | |
1140 | add_phi_arg (barrier_phi, build_zero_cst (TREE_TYPE (var)), ef, | |
1141 | UNKNOWN_LOCATION); | |
1142 | ||
1143 | update_stmt (barrier_phi); | |
1144 | } | |
1145 | else | |
a64c2b0d | 1146 | gcc_assert (VAR_P (var)); |
e2a58ed6 JB |
1147 | |
1148 | /* If we had no record type, we will have no fields map. */ | |
0fe9176f | 1149 | field_map_t *fields = record_field_map->get (record_type); |
e2a58ed6 JB |
1150 | |
1151 | if (worker_partitioned_uses->contains (var) | |
1152 | && fields | |
1153 | && fields->get (var)) | |
1154 | { | |
1155 | tree neutered_def = make_ssa_name (TREE_TYPE (var)); | |
1156 | ||
1157 | /* Receive definition from shared memory block. */ | |
1158 | ||
049eda82 | 1159 | tree receiver_ref = build_receiver_ref (var, receiver_decl, fields); |
e2a58ed6 JB |
1160 | gassign *recv = gimple_build_assign (neutered_def, |
1161 | receiver_ref); | |
1162 | gsi_insert_after (©out_gsi, recv, GSI_CONTINUE_LINKING); | |
1163 | update_stmt (recv); | |
1164 | ||
a64c2b0d | 1165 | if (VAR_P (var)) |
e2a58ed6 JB |
1166 | { |
1167 | /* If it's a VAR_DECL, we only copied to an SSA temporary. Copy | |
1168 | to the final location now. */ | |
1169 | gassign *asgn = gimple_build_assign (var, neutered_def); | |
1170 | gsi_insert_after (©out_gsi, asgn, GSI_CONTINUE_LINKING); | |
1171 | update_stmt (asgn); | |
1172 | } | |
1173 | else | |
1174 | { | |
1175 | /* If it's an SSA name, create a new phi at the join node to | |
1176 | represent either the output from the active worker (the | |
1177 | barrier) or the inactive workers (the copyout block). */ | |
1178 | gphi *join_phi = create_phi_node (NULL_TREE, exit_block); | |
1179 | create_new_def_for (barrier_def, join_phi, | |
1180 | gimple_phi_result_ptr (join_phi)); | |
1181 | add_phi_arg (join_phi, barrier_def, ef2, UNKNOWN_LOCATION); | |
1182 | add_phi_arg (join_phi, neutered_def, copyout_to_exit, | |
1183 | UNKNOWN_LOCATION); | |
1184 | update_stmt (join_phi); | |
1185 | } | |
1186 | ||
1187 | /* Send definition to shared memory block. */ | |
1188 | ||
049eda82 | 1189 | tree sender_ref = build_sender_ref (var, sender_decl, fields); |
e2a58ed6 JB |
1190 | |
1191 | if (TREE_CODE (var) == SSA_NAME) | |
1192 | { | |
1193 | gassign *send = gimple_build_assign (sender_ref, var); | |
1194 | gimple_seq_add_stmt (&sender_seq, send); | |
1195 | update_stmt (send); | |
1196 | } | |
a64c2b0d | 1197 | else if (VAR_P (var)) |
e2a58ed6 JB |
1198 | { |
1199 | tree tmp = make_ssa_name (TREE_TYPE (var)); | |
1200 | gassign *send = gimple_build_assign (tmp, var); | |
1201 | gimple_seq_add_stmt (&sender_seq, send); | |
1202 | update_stmt (send); | |
1203 | send = gimple_build_assign (sender_ref, tmp); | |
1204 | gimple_seq_add_stmt (&sender_seq, send); | |
1205 | update_stmt (send); | |
1206 | } | |
1207 | else | |
1208 | gcc_unreachable (); | |
1209 | } | |
1210 | } | |
1211 | ||
2a3f9f65 JB |
1212 | /* The shared-memory range for this block overflowed. Add a barrier at the |
1213 | end. */ | |
1214 | if (isolate_broadcasts) | |
1215 | { | |
1216 | gsi = gsi_start_bb (exit_block); | |
1217 | decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); | |
1218 | gimple *acc_bar = gimple_build_call (decl, 0); | |
1219 | gsi_insert_before (&gsi, acc_bar, GSI_SAME_STMT); | |
1220 | } | |
1221 | ||
e2a58ed6 JB |
1222 | /* It's possible for the ET->DEST block (the work done by the active thread) |
1223 | to finish with a control-flow insn, e.g. a UNIQUE function call. Split | |
1224 | the block and add SENDER_SEQ in the latter part to avoid having control | |
1225 | flow in the middle of a BB. */ | |
1226 | ||
1227 | decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_END); | |
2a3f9f65 JB |
1228 | call = gimple_build_call (decl, 1, |
1229 | POINTER_TYPE_P (TREE_TYPE (sender_decl)) | |
1230 | ? sender_decl | |
1231 | : build_fold_addr_expr (sender_decl)); | |
e2a58ed6 JB |
1232 | gimple_seq_add_stmt (&sender_seq, call); |
1233 | ||
1234 | gsi = gsi_last_bb (body); | |
1235 | gimple *last = gsi_stmt (gsi); | |
1236 | basic_block sender_block = split_block (body, last)->dest; | |
1237 | gsi = gsi_last_bb (sender_block); | |
1238 | gsi_insert_seq_after (&gsi, sender_seq, GSI_CONTINUE_LINKING); | |
1239 | } | |
1240 | ||
2a3f9f65 JB |
1241 | typedef hash_map<basic_block, std::pair<unsigned HOST_WIDE_INT, bool> > |
1242 | blk_offset_map_t; | |
1243 | ||
e2a58ed6 JB |
1244 | static void |
1245 | neuter_worker_single (parallel_g *par, unsigned outer_mask, | |
1246 | bitmap worker_single, bitmap vector_single, | |
1247 | vec<propagation_set *> *prop_set, | |
049eda82 | 1248 | hash_set<tree> *partitioned_var_uses, |
2a3f9f65 | 1249 | record_field_map_t *record_field_map, |
2961ac45 JB |
1250 | blk_offset_map_t *blk_offset_map, |
1251 | bitmap writes_gang_private) | |
e2a58ed6 JB |
1252 | { |
1253 | unsigned mask = outer_mask | par->mask; | |
1254 | ||
1255 | if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0) | |
1256 | { | |
1257 | basic_block block; | |
1258 | ||
1259 | for (unsigned i = 0; par->blocks.iterate (i, &block); i++) | |
1260 | { | |
1261 | bool has_defs = false; | |
1262 | hash_set<tree> def_escapes_block; | |
1263 | hash_set<tree> worker_partitioned_uses; | |
1264 | unsigned j; | |
1265 | tree var; | |
1266 | ||
1267 | FOR_EACH_SSA_NAME (j, var, cfun) | |
1268 | { | |
1269 | if (SSA_NAME_IS_VIRTUAL_OPERAND (var)) | |
1270 | { | |
1271 | has_defs = true; | |
1272 | continue; | |
1273 | } | |
1274 | ||
1275 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); | |
1276 | ||
1277 | if (gimple_nop_p (def_stmt)) | |
1278 | continue; | |
1279 | ||
1280 | if (gimple_bb (def_stmt)->index != block->index) | |
1281 | continue; | |
1282 | ||
1283 | gimple *use_stmt; | |
1284 | imm_use_iterator use_iter; | |
1285 | bool uses_outside_block = false; | |
1286 | bool worker_partitioned_use = false; | |
1287 | ||
1288 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, var) | |
1289 | { | |
1290 | int blocknum = gimple_bb (use_stmt)->index; | |
1291 | ||
1292 | /* Don't propagate SSA names that are only used in the | |
1293 | current block, unless the usage is in a phi node: that | |
1294 | means the name left the block, then came back in at the | |
1295 | top. */ | |
1296 | if (blocknum != block->index | |
1297 | || gimple_code (use_stmt) == GIMPLE_PHI) | |
1298 | uses_outside_block = true; | |
1299 | if (!bitmap_bit_p (worker_single, blocknum)) | |
1300 | worker_partitioned_use = true; | |
1301 | } | |
1302 | ||
1303 | if (uses_outside_block) | |
1304 | def_escapes_block.add (var); | |
1305 | ||
1306 | if (worker_partitioned_use) | |
1307 | { | |
1308 | worker_partitioned_uses.add (var); | |
1309 | has_defs = true; | |
1310 | } | |
1311 | } | |
1312 | ||
1313 | propagation_set *ws_prop = (*prop_set)[block->index]; | |
1314 | ||
1315 | if (ws_prop) | |
1316 | { | |
1317 | for (propagation_set::iterator it = ws_prop->begin (); | |
1318 | it != ws_prop->end (); | |
1319 | ++it) | |
1320 | { | |
1321 | tree var = *it; | |
1322 | if (TREE_CODE (var) == VAR_DECL) | |
1323 | { | |
1324 | def_escapes_block.add (var); | |
1325 | if (partitioned_var_uses->contains (var)) | |
1326 | { | |
1327 | worker_partitioned_uses.add (var); | |
1328 | has_defs = true; | |
1329 | } | |
1330 | } | |
1331 | } | |
1332 | ||
1333 | delete ws_prop; | |
1334 | (*prop_set)[block->index] = 0; | |
1335 | } | |
1336 | ||
2961ac45 JB |
1337 | bool only_marker_fns = true; |
1338 | bool join_block = false; | |
1339 | ||
1340 | for (gimple_stmt_iterator gsi = gsi_start_bb (block); | |
1341 | !gsi_end_p (gsi); | |
1342 | gsi_next (&gsi)) | |
1343 | { | |
1344 | gimple *stmt = gsi_stmt (gsi); | |
1345 | if (gimple_code (stmt) == GIMPLE_CALL | |
1346 | && gimple_call_internal_p (stmt, IFN_UNIQUE)) | |
1347 | { | |
1348 | enum ifn_unique_kind k = ((enum ifn_unique_kind) | |
1349 | TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); | |
1350 | if (k != IFN_UNIQUE_OACC_PRIVATE | |
1351 | && k != IFN_UNIQUE_OACC_JOIN | |
1352 | && k != IFN_UNIQUE_OACC_FORK | |
1353 | && k != IFN_UNIQUE_OACC_HEAD_MARK | |
1354 | && k != IFN_UNIQUE_OACC_TAIL_MARK) | |
1355 | only_marker_fns = false; | |
1356 | else if (k == IFN_UNIQUE_OACC_JOIN) | |
1357 | /* The JOIN marker is special in that it *cannot* be | |
1358 | predicated for worker zero, because it may be lowered | |
1359 | to a barrier instruction and all workers must typically | |
1360 | execute that barrier. We shouldn't be doing any | |
1361 | broadcasts from the join block anyway. */ | |
1362 | join_block = true; | |
1363 | } | |
1364 | else if (gimple_code (stmt) == GIMPLE_CALL | |
1365 | && gimple_call_internal_p (stmt, IFN_GOACC_LOOP)) | |
1366 | /* Empty. */; | |
1367 | else if (gimple_nop_p (stmt)) | |
1368 | /* Empty. */; | |
1369 | else | |
1370 | only_marker_fns = false; | |
1371 | } | |
1372 | ||
1373 | /* We can skip predicating this block for worker zero if the only | |
1374 | thing it contains is marker functions that will be removed in the | |
1375 | oaccdevlow pass anyway. | |
1376 | Don't do this if the block has (any) phi nodes, because those | |
1377 | might define SSA names that need broadcasting. | |
1378 | TODO: We might be able to skip transforming blocks that only | |
1379 | contain some other trivial statements too. */ | |
1380 | if (only_marker_fns && !phi_nodes (block)) | |
1381 | continue; | |
1382 | ||
1383 | gcc_assert (!join_block); | |
e2a58ed6 JB |
1384 | |
1385 | if (has_defs) | |
2a3f9f65 | 1386 | { |
2961ac45 | 1387 | tree record_type = (tree) block->aux; |
2a3f9f65 JB |
1388 | std::pair<unsigned HOST_WIDE_INT, bool> *off_rngalloc |
1389 | = blk_offset_map->get (block); | |
1390 | gcc_assert (!record_type || off_rngalloc); | |
1391 | unsigned HOST_WIDE_INT offset | |
1392 | = off_rngalloc ? off_rngalloc->first : 0; | |
1393 | bool range_allocated | |
1394 | = off_rngalloc ? off_rngalloc->second : true; | |
2961ac45 JB |
1395 | bool has_gang_private_write |
1396 | = bitmap_bit_p (writes_gang_private, block->index); | |
2a3f9f65 JB |
1397 | worker_single_copy (block, block, &def_escapes_block, |
1398 | &worker_partitioned_uses, record_type, | |
1399 | record_field_map, | |
2961ac45 JB |
1400 | offset, !range_allocated, |
1401 | has_gang_private_write); | |
2a3f9f65 | 1402 | } |
e2a58ed6 JB |
1403 | else |
1404 | worker_single_simple (block, block, &def_escapes_block); | |
1405 | } | |
1406 | } | |
1407 | ||
1408 | if ((outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0) | |
1409 | { | |
1410 | basic_block block; | |
1411 | ||
1412 | for (unsigned i = 0; par->blocks.iterate (i, &block); i++) | |
1413 | for (gimple_stmt_iterator gsi = gsi_start_bb (block); | |
1414 | !gsi_end_p (gsi); | |
1415 | gsi_next (&gsi)) | |
1416 | { | |
1417 | gimple *stmt = gsi_stmt (gsi); | |
1418 | ||
1419 | if (gimple_code (stmt) == GIMPLE_CALL | |
1420 | && !gimple_call_internal_p (stmt) | |
1421 | && !omp_sese_active_worker_call (as_a <gcall *> (stmt))) | |
1422 | { | |
1423 | /* If we have an OpenACC routine call in worker-single mode, | |
1424 | place barriers before and afterwards to prevent | |
1425 | clobbering re-used shared memory regions (as are used | |
1426 | for AMDGCN at present, for example). */ | |
1427 | tree decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); | |
1428 | gsi_insert_before (&gsi, gimple_build_call (decl, 0), | |
1429 | GSI_SAME_STMT); | |
1430 | gsi_insert_after (&gsi, gimple_build_call (decl, 0), | |
1431 | GSI_NEW_STMT); | |
1432 | } | |
1433 | } | |
1434 | } | |
1435 | ||
1436 | if (par->inner) | |
1437 | neuter_worker_single (par->inner, mask, worker_single, vector_single, | |
2a3f9f65 | 1438 | prop_set, partitioned_var_uses, record_field_map, |
2961ac45 | 1439 | blk_offset_map, writes_gang_private); |
e2a58ed6 JB |
1440 | if (par->next) |
1441 | neuter_worker_single (par->next, outer_mask, worker_single, vector_single, | |
2a3f9f65 | 1442 | prop_set, partitioned_var_uses, record_field_map, |
2961ac45 | 1443 | blk_offset_map, writes_gang_private); |
2a3f9f65 JB |
1444 | } |
1445 | ||
1446 | static void | |
1447 | dfs_broadcast_reachable_1 (basic_block bb, sbitmap reachable) | |
1448 | { | |
1449 | if (bb->flags & BB_VISITED) | |
1450 | return; | |
1451 | ||
1452 | bb->flags |= BB_VISITED; | |
1453 | ||
1454 | if (bb->succs) | |
1455 | { | |
1456 | edge e; | |
1457 | edge_iterator ei; | |
1458 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1459 | { | |
1460 | basic_block dest = e->dest; | |
1461 | if (dest->aux) | |
1462 | bitmap_set_bit (reachable, dest->index); | |
1463 | else | |
1464 | dfs_broadcast_reachable_1 (dest, reachable); | |
1465 | } | |
1466 | } | |
e2a58ed6 JB |
1467 | } |
1468 | ||
2a3f9f65 JB |
1469 | typedef std::pair<int, tree> idx_decl_pair_t; |
1470 | ||
1471 | typedef auto_vec<splay_tree> used_range_vec_t; | |
1472 | ||
e2a58ed6 | 1473 | static int |
2a3f9f65 JB |
1474 | sort_size_descending (const void *a, const void *b) |
1475 | { | |
1476 | const idx_decl_pair_t *pa = (const idx_decl_pair_t *) a; | |
1477 | const idx_decl_pair_t *pb = (const idx_decl_pair_t *) b; | |
1478 | unsigned HOST_WIDE_INT asize = tree_to_uhwi (TYPE_SIZE_UNIT (pa->second)); | |
1479 | unsigned HOST_WIDE_INT bsize = tree_to_uhwi (TYPE_SIZE_UNIT (pb->second)); | |
1480 | return bsize - asize; | |
1481 | } | |
1482 | ||
1483 | class addr_range | |
1484 | { | |
1485 | public: | |
1486 | addr_range (unsigned HOST_WIDE_INT addr_lo, unsigned HOST_WIDE_INT addr_hi) | |
1487 | : lo (addr_lo), hi (addr_hi) | |
1488 | { } | |
1489 | addr_range (const addr_range &ar) : lo (ar.lo), hi (ar.hi) | |
1490 | { } | |
1491 | addr_range () : lo (0), hi (0) | |
1492 | { } | |
1493 | ||
1494 | bool invalid () { return lo == 0 && hi == 0; } | |
1495 | ||
1496 | unsigned HOST_WIDE_INT lo; | |
1497 | unsigned HOST_WIDE_INT hi; | |
1498 | }; | |
1499 | ||
1500 | static int | |
1501 | splay_tree_compare_addr_range (splay_tree_key a, splay_tree_key b) | |
1502 | { | |
1503 | addr_range *ar = (addr_range *) a; | |
1504 | addr_range *br = (addr_range *) b; | |
1505 | if (ar->lo == br->lo && ar->hi == br->hi) | |
1506 | return 0; | |
1507 | if (ar->hi <= br->lo) | |
1508 | return -1; | |
1509 | else if (ar->lo >= br->hi) | |
1510 | return 1; | |
1511 | return 0; | |
1512 | } | |
1513 | ||
1514 | static void | |
1515 | splay_tree_free_key (splay_tree_key k) | |
1516 | { | |
1517 | addr_range *ar = (addr_range *) k; | |
1518 | delete ar; | |
1519 | } | |
1520 | ||
1521 | static addr_range | |
1522 | first_fit_range (splay_tree s, unsigned HOST_WIDE_INT size, | |
1523 | unsigned HOST_WIDE_INT align, addr_range *bounds) | |
1524 | { | |
1525 | splay_tree_node min = splay_tree_min (s); | |
1526 | if (min) | |
1527 | { | |
1528 | splay_tree_node next; | |
1529 | while ((next = splay_tree_successor (s, min->key))) | |
1530 | { | |
1531 | unsigned HOST_WIDE_INT lo = ((addr_range *) min->key)->hi; | |
1532 | unsigned HOST_WIDE_INT hi = ((addr_range *) next->key)->lo; | |
1533 | unsigned HOST_WIDE_INT base = (lo + align - 1) & ~(align - 1); | |
1534 | if (base + size <= hi) | |
1535 | return addr_range (base, base + size); | |
1536 | min = next; | |
1537 | } | |
1538 | ||
1539 | unsigned HOST_WIDE_INT base = ((addr_range *)min->key)->hi; | |
1540 | base = (base + align - 1) & ~(align - 1); | |
1541 | if (base + size <= bounds->hi) | |
1542 | return addr_range (base, base + size); | |
1543 | else | |
1544 | return addr_range (); | |
1545 | } | |
1546 | else | |
1547 | { | |
1548 | unsigned HOST_WIDE_INT lo = bounds->lo; | |
1549 | lo = (lo + align - 1) & ~(align - 1); | |
1550 | if (lo + size <= bounds->hi) | |
1551 | return addr_range (lo, lo + size); | |
1552 | else | |
1553 | return addr_range (); | |
1554 | } | |
1555 | } | |
1556 | ||
1557 | static int | |
1558 | merge_ranges_1 (splay_tree_node n, void *ptr) | |
1559 | { | |
1560 | splay_tree accum = (splay_tree) ptr; | |
1561 | addr_range ar = *(addr_range *) n->key; | |
1562 | ||
1563 | splay_tree_node old = splay_tree_lookup (accum, n->key); | |
1564 | ||
1565 | /* We might have an overlap. Create a new range covering the | |
1566 | overlapping parts. */ | |
1567 | if (old) | |
1568 | { | |
1569 | addr_range *old_ar = (addr_range *) old->key; | |
1570 | ar.lo = MIN (old_ar->lo, ar.lo); | |
1571 | ar.hi = MAX (old_ar->hi, ar.hi); | |
1572 | splay_tree_remove (accum, old->key); | |
1573 | } | |
1574 | ||
1575 | addr_range *new_ar = new addr_range (ar); | |
1576 | ||
1577 | splay_tree_insert (accum, (splay_tree_key) new_ar, n->value); | |
1578 | ||
1579 | return 0; | |
1580 | } | |
1581 | ||
1582 | static void | |
1583 | merge_ranges (splay_tree accum, splay_tree sp) | |
1584 | { | |
1585 | splay_tree_foreach (sp, merge_ranges_1, (void *) accum); | |
1586 | } | |
1587 | ||
1588 | static void | |
1589 | oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo, | |
1590 | unsigned HOST_WIDE_INT bounds_hi) | |
e2a58ed6 JB |
1591 | { |
1592 | bb_stmt_map_t bb_stmt_map; | |
1593 | auto_bitmap worker_single, vector_single; | |
1594 | ||
1595 | omp_sese_split_blocks (&bb_stmt_map); | |
1596 | ||
1597 | if (dump_file) | |
1598 | { | |
1599 | fprintf (dump_file, "\n\nAfter splitting:\n\n"); | |
1600 | dump_function_to_file (current_function_decl, dump_file, dump_flags); | |
1601 | } | |
1602 | ||
1603 | unsigned mask = 0; | |
1604 | ||
1605 | /* If this is a routine, calculate MASK as if the outer levels are already | |
1606 | partitioned. */ | |
82792cc4 JB |
1607 | { |
1608 | tree attr = oacc_get_fn_attrib (current_function_decl); | |
1609 | tree dims = TREE_VALUE (attr); | |
1610 | unsigned ix; | |
1611 | for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims)) | |
1612 | { | |
1613 | tree allowed = TREE_PURPOSE (dims); | |
1614 | if (allowed && integer_zerop (allowed)) | |
1615 | mask |= GOMP_DIM_MASK (ix); | |
1616 | } | |
1617 | } | |
e2a58ed6 JB |
1618 | |
1619 | parallel_g *par = omp_sese_discover_pars (&bb_stmt_map); | |
1620 | populate_single_mode_bitmaps (par, worker_single, vector_single, mask, 0); | |
1621 | ||
1622 | basic_block bb; | |
1623 | FOR_ALL_BB_FN (bb, cfun) | |
1624 | bb->aux = NULL; | |
1625 | ||
7b9d99e6 TS |
1626 | vec<propagation_set *> prop_set (vNULL); |
1627 | prop_set.safe_grow_cleared (last_basic_block_for_fn (cfun), true); | |
e2a58ed6 JB |
1628 | |
1629 | find_ssa_names_to_propagate (par, mask, worker_single, vector_single, | |
1630 | &prop_set); | |
1631 | ||
1632 | hash_set<tree> partitioned_var_uses; | |
1633 | hash_set<tree> gang_private_vars; | |
2961ac45 | 1634 | auto_bitmap writes_gang_private; |
e2a58ed6 JB |
1635 | |
1636 | find_gang_private_vars (&gang_private_vars); | |
1637 | find_partitioned_var_uses (par, mask, &partitioned_var_uses); | |
1638 | find_local_vars_to_propagate (par, mask, &partitioned_var_uses, | |
2961ac45 JB |
1639 | &gang_private_vars, writes_gang_private, |
1640 | &prop_set); | |
e2a58ed6 | 1641 | |
049eda82 TS |
1642 | record_field_map_t record_field_map; |
1643 | ||
e2a58ed6 JB |
1644 | FOR_ALL_BB_FN (bb, cfun) |
1645 | { | |
1646 | propagation_set *ws_prop = prop_set[bb->index]; | |
1647 | if (ws_prop) | |
1648 | { | |
1649 | tree record_type = lang_hooks.types.make_type (RECORD_TYPE); | |
1650 | tree name = create_tmp_var_name (".oacc_ws_data_s"); | |
1651 | name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, record_type); | |
1652 | DECL_ARTIFICIAL (name) = 1; | |
1653 | DECL_NAMELESS (name) = 1; | |
1654 | TYPE_NAME (record_type) = name; | |
1655 | TYPE_ARTIFICIAL (record_type) = 1; | |
1656 | ||
1657 | auto_vec<tree> field_vec (ws_prop->elements ()); | |
1658 | for (hash_set<tree>::iterator it = ws_prop->begin (); | |
1659 | it != ws_prop->end (); ++it) | |
1660 | field_vec.quick_push (*it); | |
1661 | ||
1662 | field_vec.qsort (sort_by_size_then_ssa_version_or_uid); | |
1663 | ||
049eda82 | 1664 | bool existed; |
0fe9176f TS |
1665 | field_map_t *fields |
1666 | = &record_field_map.get_or_insert (record_type, &existed); | |
049eda82 | 1667 | gcc_checking_assert (!existed); |
e2a58ed6 JB |
1668 | |
1669 | /* Insert var fields in reverse order, so the last inserted element | |
1670 | is the first in the structure. */ | |
1671 | for (int i = field_vec.length () - 1; i >= 0; i--) | |
049eda82 | 1672 | install_var_field (field_vec[i], record_type, fields); |
e2a58ed6 JB |
1673 | |
1674 | layout_type (record_type); | |
1675 | ||
1676 | bb->aux = (tree) record_type; | |
1677 | } | |
1678 | } | |
1679 | ||
2a3f9f65 JB |
1680 | sbitmap *reachable |
1681 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
1682 | last_basic_block_for_fn (cfun)); | |
1683 | ||
1684 | bitmap_vector_clear (reachable, last_basic_block_for_fn (cfun)); | |
1685 | ||
1686 | auto_vec<std::pair<int, tree> > priority; | |
1687 | ||
1688 | FOR_ALL_BB_FN (bb, cfun) | |
1689 | { | |
1690 | if (bb->aux) | |
1691 | { | |
1692 | tree record_type = (tree) bb->aux; | |
1693 | ||
1694 | basic_block bb2; | |
1695 | FOR_ALL_BB_FN (bb2, cfun) | |
1696 | bb2->flags &= ~BB_VISITED; | |
1697 | ||
1698 | priority.safe_push (std::make_pair (bb->index, record_type)); | |
1699 | dfs_broadcast_reachable_1 (bb, reachable[bb->index]); | |
1700 | } | |
1701 | } | |
1702 | ||
1703 | sbitmap *inverted | |
1704 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
1705 | last_basic_block_for_fn (cfun)); | |
1706 | ||
1707 | bitmap_vector_clear (inverted, last_basic_block_for_fn (cfun)); | |
1708 | ||
1709 | for (int i = 0; i < last_basic_block_for_fn (cfun); i++) | |
1710 | { | |
1711 | sbitmap_iterator bi; | |
1712 | unsigned int j; | |
1713 | EXECUTE_IF_SET_IN_BITMAP (reachable[i], 0, j, bi) | |
1714 | bitmap_set_bit (inverted[j], i); | |
1715 | } | |
1716 | ||
1717 | for (int i = 0; i < last_basic_block_for_fn (cfun); i++) | |
1718 | bitmap_ior (reachable[i], reachable[i], inverted[i]); | |
1719 | ||
1720 | sbitmap_vector_free (inverted); | |
1721 | ||
1722 | used_range_vec_t used_ranges; | |
1723 | ||
1724 | used_ranges.safe_grow_cleared (last_basic_block_for_fn (cfun)); | |
1725 | ||
1726 | blk_offset_map_t blk_offset_map; | |
1727 | ||
1728 | addr_range worker_shm_bounds (bounds_lo, bounds_hi); | |
1729 | ||
1730 | priority.qsort (sort_size_descending); | |
1731 | for (unsigned int i = 0; i < priority.length (); i++) | |
1732 | { | |
1733 | idx_decl_pair_t p = priority[i]; | |
1734 | int blkno = p.first; | |
1735 | tree record_type = p.second; | |
1736 | HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (record_type)); | |
1737 | HOST_WIDE_INT align = TYPE_ALIGN_UNIT (record_type); | |
1738 | ||
1739 | splay_tree conflicts = splay_tree_new (splay_tree_compare_addr_range, | |
1740 | splay_tree_free_key, NULL); | |
1741 | ||
1742 | if (!used_ranges[blkno]) | |
1743 | used_ranges[blkno] = splay_tree_new (splay_tree_compare_addr_range, | |
1744 | splay_tree_free_key, NULL); | |
1745 | else | |
1746 | merge_ranges (conflicts, used_ranges[blkno]); | |
1747 | ||
1748 | sbitmap_iterator bi; | |
1749 | unsigned int j; | |
1750 | EXECUTE_IF_SET_IN_BITMAP (reachable[blkno], 0, j, bi) | |
1751 | if (used_ranges[j]) | |
1752 | merge_ranges (conflicts, used_ranges[j]); | |
1753 | ||
1754 | addr_range ar | |
1755 | = first_fit_range (conflicts, size, align, &worker_shm_bounds); | |
1756 | ||
1757 | splay_tree_delete (conflicts); | |
1758 | ||
1759 | if (ar.invalid ()) | |
1760 | { | |
e87789f1 TS |
1761 | unsigned HOST_WIDE_INT base |
1762 | = (bounds_lo + align - 1) & ~(align - 1); | |
2a3f9f65 JB |
1763 | if (base + size > bounds_hi) |
1764 | error_at (UNKNOWN_LOCATION, "shared-memory region overflow"); | |
1765 | std::pair<unsigned HOST_WIDE_INT, bool> base_inrng | |
1766 | = std::make_pair (base, false); | |
1767 | blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng); | |
1768 | } | |
1769 | else | |
1770 | { | |
1771 | splay_tree_node old = splay_tree_lookup (used_ranges[blkno], | |
1772 | (splay_tree_key) &ar); | |
1773 | if (old) | |
1774 | { | |
1775 | fprintf (stderr, "trying to map [%d..%d] but [%d..%d] is " | |
1776 | "already mapped in block %d\n", (int) ar.lo, | |
1777 | (int) ar.hi, (int) ((addr_range *) old->key)->lo, | |
1778 | (int) ((addr_range *) old->key)->hi, blkno); | |
1779 | abort (); | |
1780 | } | |
1781 | ||
1782 | addr_range *arp = new addr_range (ar); | |
1783 | splay_tree_insert (used_ranges[blkno], (splay_tree_key) arp, | |
1784 | (splay_tree_value) blkno); | |
1785 | std::pair<unsigned HOST_WIDE_INT, bool> base_inrng | |
1786 | = std::make_pair (ar.lo, true); | |
1787 | blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng); | |
1788 | } | |
1789 | } | |
1790 | ||
1791 | sbitmap_vector_free (reachable); | |
1792 | ||
e2a58ed6 | 1793 | neuter_worker_single (par, mask, worker_single, vector_single, &prop_set, |
2a3f9f65 | 1794 | &partitioned_var_uses, &record_field_map, |
2961ac45 | 1795 | &blk_offset_map, writes_gang_private); |
049eda82 | 1796 | |
049eda82 | 1797 | record_field_map.empty (); |
e2a58ed6 | 1798 | |
7b9d99e6 TS |
1799 | /* These are supposed to have been 'delete'd by 'neuter_worker_single'. */ |
1800 | for (auto it : prop_set) | |
1801 | gcc_checking_assert (!it); | |
e2a58ed6 JB |
1802 | prop_set.release (); |
1803 | ||
df98015f TS |
1804 | delete par; |
1805 | ||
e2a58ed6 JB |
1806 | /* This doesn't seem to make a difference. */ |
1807 | loops_state_clear (LOOP_CLOSED_SSA); | |
1808 | ||
1809 | /* Neutering worker-single neutered blocks will invalidate dominance info. | |
1810 | It may be possible to incrementally update just the affected blocks, but | |
1811 | obliterate everything for now. */ | |
1812 | free_dominance_info (CDI_DOMINATORS); | |
1813 | free_dominance_info (CDI_POST_DOMINATORS); | |
1814 | ||
1815 | if (dump_file) | |
1816 | { | |
1817 | fprintf (dump_file, "\n\nAfter neutering:\n\n"); | |
1818 | dump_function_to_file (current_function_decl, dump_file, dump_flags); | |
1819 | } | |
2a3f9f65 JB |
1820 | } |
1821 | ||
1822 | static int | |
1823 | execute_omp_oacc_neuter_broadcast () | |
1824 | { | |
1825 | unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX]; | |
1826 | unsigned HOST_WIDE_INT private_size[GOMP_DIM_MAX]; | |
1827 | ||
1828 | for (unsigned i = 0; i < GOMP_DIM_MAX; i++) | |
1829 | { | |
1830 | reduction_size[i] = 0; | |
1831 | private_size[i] = 0; | |
1832 | } | |
1833 | ||
1834 | /* Calculate shared memory size required for reduction variables and | |
1835 | gang-private memory for this offloaded function. */ | |
1836 | basic_block bb; | |
1837 | FOR_ALL_BB_FN (bb, cfun) | |
1838 | { | |
1839 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); | |
1840 | !gsi_end_p (gsi); | |
1841 | gsi_next (&gsi)) | |
1842 | { | |
1843 | gimple *stmt = gsi_stmt (gsi); | |
1844 | if (!is_gimple_call (stmt)) | |
1845 | continue; | |
1846 | gcall *call = as_a <gcall *> (stmt); | |
1847 | if (!gimple_call_internal_p (call)) | |
1848 | continue; | |
1849 | enum internal_fn ifn_code = gimple_call_internal_fn (call); | |
1850 | switch (ifn_code) | |
1851 | { | |
1852 | default: break; | |
1853 | case IFN_GOACC_REDUCTION: | |
1854 | if (integer_minus_onep (gimple_call_arg (call, 3))) | |
1855 | continue; | |
1856 | else | |
1857 | { | |
1858 | unsigned code = TREE_INT_CST_LOW (gimple_call_arg (call, 0)); | |
1859 | /* Only count reduction variables once: the choice to pick | |
1860 | the setup call is fairly arbitrary. */ | |
1861 | if (code == IFN_GOACC_REDUCTION_SETUP) | |
1862 | { | |
1863 | int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); | |
1864 | tree var = gimple_call_arg (call, 2); | |
1865 | tree offset = gimple_call_arg (call, 5); | |
1866 | tree var_type = TREE_TYPE (var); | |
1867 | unsigned HOST_WIDE_INT limit | |
1868 | = (tree_to_uhwi (offset) | |
1869 | + tree_to_uhwi (TYPE_SIZE_UNIT (var_type))); | |
1870 | reduction_size[level] | |
1871 | = MAX (reduction_size[level], limit); | |
1872 | } | |
1873 | } | |
1874 | break; | |
1875 | case IFN_UNIQUE: | |
1876 | { | |
1877 | enum ifn_unique_kind kind | |
1878 | = ((enum ifn_unique_kind) | |
1879 | TREE_INT_CST_LOW (gimple_call_arg (call, 0))); | |
1880 | ||
1881 | if (kind == IFN_UNIQUE_OACC_PRIVATE) | |
1882 | { | |
1883 | HOST_WIDE_INT level | |
1884 | = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); | |
1885 | if (level == -1) | |
1886 | break; | |
1887 | for (unsigned i = 3; | |
1888 | i < gimple_call_num_args (call); | |
1889 | i++) | |
1890 | { | |
1891 | tree arg = gimple_call_arg (call, i); | |
1892 | gcc_assert (TREE_CODE (arg) == ADDR_EXPR); | |
1893 | tree decl = TREE_OPERAND (arg, 0); | |
1894 | unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (decl); | |
1895 | private_size[level] = ((private_size[level] + align - 1) | |
1896 | & ~(align - 1)); | |
1897 | unsigned HOST_WIDE_INT decl_size | |
1898 | = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (decl))); | |
1899 | private_size[level] += decl_size; | |
1900 | } | |
1901 | } | |
1902 | } | |
1903 | break; | |
1904 | } | |
1905 | } | |
1906 | } | |
1907 | ||
1908 | int dims[GOMP_DIM_MAX]; | |
1909 | for (unsigned i = 0; i < GOMP_DIM_MAX; i++) | |
1910 | dims[i] = oacc_get_fn_dim_size (current_function_decl, i); | |
1911 | ||
1912 | /* Find bounds of shared-memory buffer space we can use. */ | |
1913 | unsigned HOST_WIDE_INT bounds_lo = 0, bounds_hi = 0; | |
1914 | if (targetm.goacc.shared_mem_layout) | |
1915 | targetm.goacc.shared_mem_layout (&bounds_lo, &bounds_hi, dims, | |
1916 | private_size, reduction_size); | |
1917 | ||
1918 | /* Perform worker partitioning unless we know 'num_workers(1)'. */ | |
1919 | if (dims[GOMP_DIM_WORKER] != 1) | |
1920 | oacc_do_neutering (bounds_lo, bounds_hi); | |
e2a58ed6 JB |
1921 | |
1922 | return 0; | |
1923 | } | |
1924 | ||
1925 | namespace { | |
1926 | ||
1927 | const pass_data pass_data_omp_oacc_neuter_broadcast = | |
1928 | { | |
1929 | GIMPLE_PASS, /* type */ | |
1930 | "omp_oacc_neuter_broadcast", /* name */ | |
1931 | OPTGROUP_OMP, /* optinfo_flags */ | |
1932 | TV_NONE, /* tv_id */ | |
1933 | PROP_cfg, /* properties_required */ | |
1934 | 0, /* properties_provided */ | |
1935 | 0, /* properties_destroyed */ | |
1936 | 0, /* todo_flags_start */ | |
1937 | TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */ | |
1938 | }; | |
1939 | ||
1940 | class pass_omp_oacc_neuter_broadcast : public gimple_opt_pass | |
1941 | { | |
1942 | public: | |
1943 | pass_omp_oacc_neuter_broadcast (gcc::context *ctxt) | |
1944 | : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast, ctxt) | |
1945 | {} | |
1946 | ||
1947 | /* opt_pass methods: */ | |
725793af | 1948 | bool gate (function *fun) final override |
e2a58ed6 | 1949 | { |
82792cc4 JB |
1950 | if (!flag_openacc) |
1951 | return false; | |
1952 | ||
1953 | if (!targetm.goacc.create_worker_broadcast_record) | |
1954 | return false; | |
1955 | ||
1956 | /* Only relevant for OpenACC offloaded functions. */ | |
1957 | tree attr = oacc_get_fn_attrib (fun->decl); | |
1958 | if (!attr) | |
1959 | return false; | |
1960 | ||
82792cc4 JB |
1961 | return true; |
1962 | } | |
e2a58ed6 | 1963 | |
725793af | 1964 | unsigned int execute (function *) final override |
e2a58ed6 JB |
1965 | { |
1966 | return execute_omp_oacc_neuter_broadcast (); | |
1967 | } | |
1968 | ||
1969 | }; // class pass_omp_oacc_neuter_broadcast | |
1970 | ||
1971 | } // anon namespace | |
1972 | ||
1973 | gimple_opt_pass * | |
1974 | make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt) | |
1975 | { | |
1976 | return new pass_omp_oacc_neuter_broadcast (ctxt); | |
1977 | } |