]>
Commit | Line | Data |
---|---|---|
4954efd4 | 1 | /* General types and functions that are uselful for processing of OpenMP, |
2 | OpenACC and similar directivers at various stages of compilation. | |
3 | ||
fbd26352 | 4 | Copyright (C) 2005-2019 Free Software Foundation, Inc. |
4954efd4 | 5 | |
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 3, or (at your option) any later | |
11 | version. | |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | /* Find an OMP clause of type KIND within CLAUSES. */ | |
23 | ||
24 | #include "config.h" | |
25 | #include "system.h" | |
26 | #include "coretypes.h" | |
27 | #include "backend.h" | |
28 | #include "target.h" | |
29 | #include "tree.h" | |
30 | #include "gimple.h" | |
31 | #include "ssa.h" | |
32 | #include "diagnostic-core.h" | |
33 | #include "fold-const.h" | |
34 | #include "langhooks.h" | |
35 | #include "omp-general.h" | |
30a86690 | 36 | #include "stringpool.h" |
37 | #include "attribs.h" | |
4954efd4 | 38 | |
7e5a76c8 | 39 | enum omp_requires omp_requires_mask; |
40 | ||
4954efd4 | 41 | tree |
42 | omp_find_clause (tree clauses, enum omp_clause_code kind) | |
43 | { | |
44 | for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) | |
45 | if (OMP_CLAUSE_CODE (clauses) == kind) | |
46 | return clauses; | |
47 | ||
48 | return NULL_TREE; | |
49 | } | |
50 | ||
51 | /* Return true if DECL is a reference type. */ | |
52 | ||
53 | bool | |
54 | omp_is_reference (tree decl) | |
55 | { | |
56 | return lang_hooks.decls.omp_privatize_by_reference (decl); | |
57 | } | |
58 | ||
4226cb1d | 59 | /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or GT_EXPR, |
60 | given that V is the loop index variable and STEP is loop step. */ | |
4954efd4 | 61 | |
62 | void | |
4226cb1d | 63 | omp_adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2, |
64 | tree v, tree step) | |
4954efd4 | 65 | { |
66 | switch (*cond_code) | |
67 | { | |
68 | case LT_EXPR: | |
69 | case GT_EXPR: | |
4226cb1d | 70 | break; |
71 | ||
4954efd4 | 72 | case NE_EXPR: |
4226cb1d | 73 | gcc_assert (TREE_CODE (step) == INTEGER_CST); |
74 | if (TREE_CODE (TREE_TYPE (v)) == INTEGER_TYPE) | |
75 | { | |
76 | if (integer_onep (step)) | |
77 | *cond_code = LT_EXPR; | |
78 | else | |
79 | { | |
80 | gcc_assert (integer_minus_onep (step)); | |
81 | *cond_code = GT_EXPR; | |
82 | } | |
83 | } | |
84 | else | |
85 | { | |
86 | tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (v))); | |
87 | gcc_assert (TREE_CODE (unit) == INTEGER_CST); | |
88 | if (tree_int_cst_equal (unit, step)) | |
89 | *cond_code = LT_EXPR; | |
90 | else | |
91 | { | |
92 | gcc_assert (wi::neg (wi::to_widest (unit)) | |
93 | == wi::to_widest (step)); | |
94 | *cond_code = GT_EXPR; | |
95 | } | |
96 | } | |
97 | ||
4954efd4 | 98 | break; |
4226cb1d | 99 | |
4954efd4 | 100 | case LE_EXPR: |
101 | if (POINTER_TYPE_P (TREE_TYPE (*n2))) | |
102 | *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1); | |
103 | else | |
104 | *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2, | |
105 | build_int_cst (TREE_TYPE (*n2), 1)); | |
106 | *cond_code = LT_EXPR; | |
107 | break; | |
108 | case GE_EXPR: | |
109 | if (POINTER_TYPE_P (TREE_TYPE (*n2))) | |
110 | *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1); | |
111 | else | |
112 | *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2, | |
113 | build_int_cst (TREE_TYPE (*n2), 1)); | |
114 | *cond_code = GT_EXPR; | |
115 | break; | |
116 | default: | |
117 | gcc_unreachable (); | |
118 | } | |
119 | } | |
120 | ||
121 | /* Return the looping step from INCR, extracted from the step of a gimple omp | |
122 | for statement. */ | |
123 | ||
124 | tree | |
125 | omp_get_for_step_from_incr (location_t loc, tree incr) | |
126 | { | |
127 | tree step; | |
128 | switch (TREE_CODE (incr)) | |
129 | { | |
130 | case PLUS_EXPR: | |
131 | step = TREE_OPERAND (incr, 1); | |
132 | break; | |
133 | case POINTER_PLUS_EXPR: | |
134 | step = fold_convert (ssizetype, TREE_OPERAND (incr, 1)); | |
135 | break; | |
136 | case MINUS_EXPR: | |
137 | step = TREE_OPERAND (incr, 1); | |
138 | step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step); | |
139 | break; | |
140 | default: | |
141 | gcc_unreachable (); | |
142 | } | |
143 | return step; | |
144 | } | |
145 | ||
146 | /* Extract the header elements of parallel loop FOR_STMT and store | |
147 | them into *FD. */ | |
148 | ||
149 | void | |
150 | omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, | |
151 | struct omp_for_data_loop *loops) | |
152 | { | |
153 | tree t, var, *collapse_iter, *collapse_count; | |
154 | tree count = NULL_TREE, iter_type = long_integer_type_node; | |
155 | struct omp_for_data_loop *loop; | |
156 | int i; | |
157 | struct omp_for_data_loop dummy_loop; | |
158 | location_t loc = gimple_location (for_stmt); | |
0076df39 | 159 | bool simd = gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_SIMD; |
4954efd4 | 160 | bool distribute = gimple_omp_for_kind (for_stmt) |
161 | == GF_OMP_FOR_KIND_DISTRIBUTE; | |
162 | bool taskloop = gimple_omp_for_kind (for_stmt) | |
163 | == GF_OMP_FOR_KIND_TASKLOOP; | |
164 | tree iterv, countv; | |
165 | ||
166 | fd->for_stmt = for_stmt; | |
167 | fd->pre = NULL; | |
4954efd4 | 168 | fd->have_nowait = distribute || simd; |
169 | fd->have_ordered = false; | |
7e5a76c8 | 170 | fd->have_reductemp = false; |
48152aa2 | 171 | fd->have_pointer_condtemp = false; |
7d26f131 | 172 | fd->have_scantemp = false; |
173 | fd->have_nonctrl_scantemp = false; | |
9a1d892b | 174 | fd->lastprivate_conditional = 0; |
719a7570 | 175 | fd->tiling = NULL_TREE; |
4954efd4 | 176 | fd->collapse = 1; |
177 | fd->ordered = 0; | |
178 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; | |
179 | fd->sched_modifiers = 0; | |
180 | fd->chunk_size = NULL_TREE; | |
181 | fd->simd_schedule = false; | |
4954efd4 | 182 | collapse_iter = NULL; |
183 | collapse_count = NULL; | |
184 | ||
185 | for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t)) | |
186 | switch (OMP_CLAUSE_CODE (t)) | |
187 | { | |
188 | case OMP_CLAUSE_NOWAIT: | |
189 | fd->have_nowait = true; | |
190 | break; | |
191 | case OMP_CLAUSE_ORDERED: | |
192 | fd->have_ordered = true; | |
193 | if (OMP_CLAUSE_ORDERED_EXPR (t)) | |
194 | fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t)); | |
195 | break; | |
196 | case OMP_CLAUSE_SCHEDULE: | |
197 | gcc_assert (!distribute && !taskloop); | |
198 | fd->sched_kind | |
199 | = (enum omp_clause_schedule_kind) | |
200 | (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK); | |
201 | fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t) | |
202 | & ~OMP_CLAUSE_SCHEDULE_MASK); | |
203 | fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); | |
204 | fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t); | |
205 | break; | |
206 | case OMP_CLAUSE_DIST_SCHEDULE: | |
207 | gcc_assert (distribute); | |
208 | fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t); | |
209 | break; | |
210 | case OMP_CLAUSE_COLLAPSE: | |
211 | fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t)); | |
212 | if (fd->collapse > 1) | |
213 | { | |
214 | collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t); | |
215 | collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t); | |
216 | } | |
217 | break; | |
719a7570 | 218 | case OMP_CLAUSE_TILE: |
219 | fd->tiling = OMP_CLAUSE_TILE_LIST (t); | |
220 | fd->collapse = list_length (fd->tiling); | |
221 | gcc_assert (fd->collapse); | |
222 | collapse_iter = &OMP_CLAUSE_TILE_ITERVAR (t); | |
223 | collapse_count = &OMP_CLAUSE_TILE_COUNT (t); | |
224 | break; | |
7e5a76c8 | 225 | case OMP_CLAUSE__REDUCTEMP_: |
226 | fd->have_reductemp = true; | |
9a1d892b | 227 | break; |
228 | case OMP_CLAUSE_LASTPRIVATE: | |
229 | if (OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (t)) | |
230 | fd->lastprivate_conditional++; | |
231 | break; | |
48152aa2 | 232 | case OMP_CLAUSE__CONDTEMP_: |
233 | if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t)))) | |
234 | fd->have_pointer_condtemp = true; | |
235 | break; | |
7d26f131 | 236 | case OMP_CLAUSE__SCANTEMP_: |
237 | fd->have_scantemp = true; | |
238 | if (!OMP_CLAUSE__SCANTEMP__ALLOC (t) | |
239 | && !OMP_CLAUSE__SCANTEMP__CONTROL (t)) | |
240 | fd->have_nonctrl_scantemp = true; | |
241 | break; | |
4954efd4 | 242 | default: |
243 | break; | |
244 | } | |
719a7570 | 245 | |
246 | if (fd->collapse > 1 || fd->tiling) | |
247 | fd->loops = loops; | |
248 | else | |
249 | fd->loops = &fd->loop; | |
250 | ||
4954efd4 | 251 | if (fd->ordered && fd->collapse == 1 && loops != NULL) |
252 | { | |
253 | fd->loops = loops; | |
254 | iterv = NULL_TREE; | |
255 | countv = NULL_TREE; | |
256 | collapse_iter = &iterv; | |
257 | collapse_count = &countv; | |
258 | } | |
259 | ||
260 | /* FIXME: for now map schedule(auto) to schedule(static). | |
261 | There should be analysis to determine whether all iterations | |
262 | are approximately the same amount of work (then schedule(static) | |
263 | is best) or if it varies (then schedule(dynamic,N) is better). */ | |
264 | if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO) | |
265 | { | |
266 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; | |
267 | gcc_assert (fd->chunk_size == NULL); | |
268 | } | |
719a7570 | 269 | gcc_assert ((fd->collapse == 1 && !fd->tiling) || collapse_iter != NULL); |
4954efd4 | 270 | if (taskloop) |
271 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME; | |
272 | if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) | |
273 | gcc_assert (fd->chunk_size == NULL); | |
274 | else if (fd->chunk_size == NULL) | |
275 | { | |
276 | /* We only need to compute a default chunk size for ordered | |
277 | static loops and dynamic loops. */ | |
278 | if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC | |
279 | || fd->have_ordered) | |
280 | fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) | |
281 | ? integer_zero_node : integer_one_node; | |
282 | } | |
283 | ||
284 | int cnt = fd->ordered ? fd->ordered : fd->collapse; | |
285 | for (i = 0; i < cnt; i++) | |
286 | { | |
719a7570 | 287 | if (i == 0 |
288 | && fd->collapse == 1 | |
289 | && !fd->tiling | |
290 | && (fd->ordered == 0 || loops == NULL)) | |
4954efd4 | 291 | loop = &fd->loop; |
292 | else if (loops != NULL) | |
293 | loop = loops + i; | |
294 | else | |
295 | loop = &dummy_loop; | |
296 | ||
297 | loop->v = gimple_omp_for_index (for_stmt, i); | |
298 | gcc_assert (SSA_VAR_P (loop->v)); | |
299 | gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE | |
300 | || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE); | |
301 | var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v; | |
302 | loop->n1 = gimple_omp_for_initial (for_stmt, i); | |
303 | ||
304 | loop->cond_code = gimple_omp_for_cond (for_stmt, i); | |
305 | loop->n2 = gimple_omp_for_final (for_stmt, i); | |
7e5a76c8 | 306 | gcc_assert (loop->cond_code != NE_EXPR |
307 | || (gimple_omp_for_kind (for_stmt) | |
308 | != GF_OMP_FOR_KIND_OACC_LOOP)); | |
4954efd4 | 309 | |
310 | t = gimple_omp_for_incr (for_stmt, i); | |
311 | gcc_assert (TREE_OPERAND (t, 0) == var); | |
312 | loop->step = omp_get_for_step_from_incr (loc, t); | |
313 | ||
4226cb1d | 314 | omp_adjust_for_condition (loc, &loop->cond_code, &loop->n2, loop->v, |
315 | loop->step); | |
7e5a76c8 | 316 | |
4954efd4 | 317 | if (simd |
318 | || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC | |
319 | && !fd->have_ordered)) | |
320 | { | |
719a7570 | 321 | if (fd->collapse == 1 && !fd->tiling) |
4954efd4 | 322 | iter_type = TREE_TYPE (loop->v); |
323 | else if (i == 0 | |
324 | || TYPE_PRECISION (iter_type) | |
325 | < TYPE_PRECISION (TREE_TYPE (loop->v))) | |
326 | iter_type | |
327 | = build_nonstandard_integer_type | |
328 | (TYPE_PRECISION (TREE_TYPE (loop->v)), 1); | |
329 | } | |
330 | else if (iter_type != long_long_unsigned_type_node) | |
331 | { | |
332 | if (POINTER_TYPE_P (TREE_TYPE (loop->v))) | |
333 | iter_type = long_long_unsigned_type_node; | |
334 | else if (TYPE_UNSIGNED (TREE_TYPE (loop->v)) | |
335 | && TYPE_PRECISION (TREE_TYPE (loop->v)) | |
336 | >= TYPE_PRECISION (iter_type)) | |
337 | { | |
338 | tree n; | |
339 | ||
340 | if (loop->cond_code == LT_EXPR) | |
7e5a76c8 | 341 | n = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v), |
342 | loop->n2, loop->step); | |
4954efd4 | 343 | else |
344 | n = loop->n1; | |
345 | if (TREE_CODE (n) != INTEGER_CST | |
346 | || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n)) | |
347 | iter_type = long_long_unsigned_type_node; | |
348 | } | |
349 | else if (TYPE_PRECISION (TREE_TYPE (loop->v)) | |
350 | > TYPE_PRECISION (iter_type)) | |
351 | { | |
352 | tree n1, n2; | |
353 | ||
354 | if (loop->cond_code == LT_EXPR) | |
355 | { | |
356 | n1 = loop->n1; | |
7e5a76c8 | 357 | n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v), |
358 | loop->n2, loop->step); | |
4954efd4 | 359 | } |
360 | else | |
361 | { | |
7e5a76c8 | 362 | n1 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (loop->v), |
363 | loop->n2, loop->step); | |
4954efd4 | 364 | n2 = loop->n1; |
365 | } | |
366 | if (TREE_CODE (n1) != INTEGER_CST | |
367 | || TREE_CODE (n2) != INTEGER_CST | |
368 | || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1) | |
369 | || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type))) | |
370 | iter_type = long_long_unsigned_type_node; | |
371 | } | |
372 | } | |
373 | ||
374 | if (i >= fd->collapse) | |
375 | continue; | |
376 | ||
377 | if (collapse_count && *collapse_count == NULL) | |
378 | { | |
379 | t = fold_binary (loop->cond_code, boolean_type_node, | |
380 | fold_convert (TREE_TYPE (loop->v), loop->n1), | |
381 | fold_convert (TREE_TYPE (loop->v), loop->n2)); | |
382 | if (t && integer_zerop (t)) | |
383 | count = build_zero_cst (long_long_unsigned_type_node); | |
384 | else if ((i == 0 || count != NULL_TREE) | |
385 | && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE | |
386 | && TREE_CONSTANT (loop->n1) | |
387 | && TREE_CONSTANT (loop->n2) | |
388 | && TREE_CODE (loop->step) == INTEGER_CST) | |
389 | { | |
390 | tree itype = TREE_TYPE (loop->v); | |
391 | ||
392 | if (POINTER_TYPE_P (itype)) | |
393 | itype = signed_type_for (itype); | |
394 | t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1)); | |
7e5a76c8 | 395 | t = fold_build2_loc (loc, PLUS_EXPR, itype, |
396 | fold_convert_loc (loc, itype, loop->step), | |
397 | t); | |
4954efd4 | 398 | t = fold_build2_loc (loc, PLUS_EXPR, itype, t, |
7e5a76c8 | 399 | fold_convert_loc (loc, itype, loop->n2)); |
4954efd4 | 400 | t = fold_build2_loc (loc, MINUS_EXPR, itype, t, |
7e5a76c8 | 401 | fold_convert_loc (loc, itype, loop->n1)); |
4954efd4 | 402 | if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) |
7e5a76c8 | 403 | { |
404 | tree step = fold_convert_loc (loc, itype, loop->step); | |
405 | t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, | |
406 | fold_build1_loc (loc, NEGATE_EXPR, | |
407 | itype, t), | |
408 | fold_build1_loc (loc, NEGATE_EXPR, | |
409 | itype, step)); | |
410 | } | |
4954efd4 | 411 | else |
412 | t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t, | |
7e5a76c8 | 413 | fold_convert_loc (loc, itype, |
414 | loop->step)); | |
4954efd4 | 415 | t = fold_convert_loc (loc, long_long_unsigned_type_node, t); |
416 | if (count != NULL_TREE) | |
7e5a76c8 | 417 | count = fold_build2_loc (loc, MULT_EXPR, |
418 | long_long_unsigned_type_node, | |
419 | count, t); | |
4954efd4 | 420 | else |
421 | count = t; | |
422 | if (TREE_CODE (count) != INTEGER_CST) | |
423 | count = NULL_TREE; | |
424 | } | |
425 | else if (count && !integer_zerop (count)) | |
426 | count = NULL_TREE; | |
427 | } | |
428 | } | |
429 | ||
430 | if (count | |
431 | && !simd | |
432 | && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC | |
433 | || fd->have_ordered)) | |
434 | { | |
435 | if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node))) | |
436 | iter_type = long_long_unsigned_type_node; | |
437 | else | |
438 | iter_type = long_integer_type_node; | |
439 | } | |
440 | else if (collapse_iter && *collapse_iter != NULL) | |
441 | iter_type = TREE_TYPE (*collapse_iter); | |
442 | fd->iter_type = iter_type; | |
443 | if (collapse_iter && *collapse_iter == NULL) | |
444 | *collapse_iter = create_tmp_var (iter_type, ".iter"); | |
445 | if (collapse_count && *collapse_count == NULL) | |
446 | { | |
447 | if (count) | |
448 | *collapse_count = fold_convert_loc (loc, iter_type, count); | |
449 | else | |
450 | *collapse_count = create_tmp_var (iter_type, ".count"); | |
451 | } | |
452 | ||
719a7570 | 453 | if (fd->collapse > 1 || fd->tiling || (fd->ordered && loops)) |
4954efd4 | 454 | { |
455 | fd->loop.v = *collapse_iter; | |
456 | fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0); | |
457 | fd->loop.n2 = *collapse_count; | |
458 | fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); | |
459 | fd->loop.cond_code = LT_EXPR; | |
460 | } | |
461 | else if (loops) | |
462 | loops[0] = fd->loop; | |
463 | } | |
464 | ||
465 | /* Build a call to GOMP_barrier. */ | |
466 | ||
467 | gimple * | |
468 | omp_build_barrier (tree lhs) | |
469 | { | |
470 | tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL | |
471 | : BUILT_IN_GOMP_BARRIER); | |
472 | gcall *g = gimple_build_call (fndecl, 0); | |
473 | if (lhs) | |
474 | gimple_call_set_lhs (g, lhs); | |
475 | return g; | |
476 | } | |
477 | ||
478 | /* Return maximum possible vectorization factor for the target. */ | |
479 | ||
9d805ed8 | 480 | poly_uint64 |
4954efd4 | 481 | omp_max_vf (void) |
482 | { | |
483 | if (!optimize | |
484 | || optimize_debug | |
485 | || !flag_tree_loop_optimize | |
486 | || (!flag_tree_loop_vectorize | |
52e94bf8 | 487 | && global_options_set.x_flag_tree_loop_vectorize)) |
4954efd4 | 488 | return 1; |
489 | ||
3106770a | 490 | auto_vector_sizes sizes; |
e7419472 | 491 | targetm.vectorize.autovectorize_vector_sizes (&sizes, true); |
3106770a | 492 | if (!sizes.is_empty ()) |
4954efd4 | 493 | { |
3106770a | 494 | poly_uint64 vf = 0; |
495 | for (unsigned int i = 0; i < sizes.length (); ++i) | |
496 | vf = ordered_max (vf, sizes[i]); | |
497 | return vf; | |
4954efd4 | 498 | } |
3106770a | 499 | |
500 | machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode); | |
501 | if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT) | |
502 | return GET_MODE_NUNITS (vqimode); | |
503 | ||
504 | return 1; | |
4954efd4 | 505 | } |
506 | ||
507 | /* Return maximum SIMT width if offloading may target SIMT hardware. */ | |
508 | ||
509 | int | |
510 | omp_max_simt_vf (void) | |
511 | { | |
512 | if (!optimize) | |
513 | return 0; | |
514 | if (ENABLE_OFFLOADING) | |
7c6746c9 | 515 | for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;) |
4954efd4 | 516 | { |
517 | if (!strncmp (c, "nvptx", strlen ("nvptx"))) | |
518 | return 32; | |
519 | else if ((c = strchr (c, ','))) | |
520 | c++; | |
521 | } | |
522 | return 0; | |
523 | } | |
524 | ||
525 | /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK | |
526 | macro on gomp-constants.h. We do not check for overflow. */ | |
527 | ||
528 | tree | |
529 | oacc_launch_pack (unsigned code, tree device, unsigned op) | |
530 | { | |
531 | tree res; | |
532 | ||
533 | res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op)); | |
534 | if (device) | |
535 | { | |
536 | device = fold_build2 (LSHIFT_EXPR, unsigned_type_node, | |
537 | device, build_int_cst (unsigned_type_node, | |
538 | GOMP_LAUNCH_DEVICE_SHIFT)); | |
539 | res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device); | |
540 | } | |
541 | return res; | |
542 | } | |
543 | ||
544 | /* FIXME: What is the following comment for? */ | |
545 | /* Look for compute grid dimension clauses and convert to an attribute | |
546 | attached to FN. This permits the target-side code to (a) massage | |
547 | the dimensions, (b) emit that data and (c) optimize. Non-constant | |
548 | dimensions are pushed onto ARGS. | |
549 | ||
550 | The attribute value is a TREE_LIST. A set of dimensions is | |
551 | represented as a list of INTEGER_CST. Those that are runtime | |
552 | exprs are represented as an INTEGER_CST of zero. | |
553 | ||
7c6746c9 | 554 | TODO: Normally the attribute will just contain a single such list. If |
4954efd4 | 555 | however it contains a list of lists, this will represent the use of |
556 | device_type. Each member of the outer list is an assoc list of | |
557 | dimensions, keyed by the device type. The first entry will be the | |
558 | default. Well, that's the plan. */ | |
559 | ||
560 | /* Replace any existing oacc fn attribute with updated dimensions. */ | |
561 | ||
1d740b07 | 562 | /* Variant working on a list of attributes. */ |
563 | ||
564 | tree | |
565 | oacc_replace_fn_attrib_attr (tree attribs, tree dims) | |
4954efd4 | 566 | { |
567 | tree ident = get_identifier (OACC_FN_ATTRIB); | |
4954efd4 | 568 | |
569 | /* If we happen to be present as the first attrib, drop it. */ | |
570 | if (attribs && TREE_PURPOSE (attribs) == ident) | |
571 | attribs = TREE_CHAIN (attribs); | |
1d740b07 | 572 | return tree_cons (ident, dims, attribs); |
573 | } | |
574 | ||
575 | /* Variant working on a function decl. */ | |
576 | ||
577 | void | |
578 | oacc_replace_fn_attrib (tree fn, tree dims) | |
579 | { | |
580 | DECL_ATTRIBUTES (fn) | |
581 | = oacc_replace_fn_attrib_attr (DECL_ATTRIBUTES (fn), dims); | |
4954efd4 | 582 | } |
583 | ||
584 | /* Scan CLAUSES for launch dimensions and attach them to the oacc | |
585 | function attribute. Push any that are non-constant onto the ARGS | |
1d3ea8fc | 586 | list, along with an appropriate GOMP_LAUNCH_DIM tag. */ |
4954efd4 | 587 | |
588 | void | |
1d3ea8fc | 589 | oacc_set_fn_attrib (tree fn, tree clauses, vec<tree> *args) |
4954efd4 | 590 | { |
591 | /* Must match GOMP_DIM ordering. */ | |
592 | static const omp_clause_code ids[] | |
593 | = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, | |
594 | OMP_CLAUSE_VECTOR_LENGTH }; | |
595 | unsigned ix; | |
596 | tree dims[GOMP_DIM_MAX]; | |
597 | ||
598 | tree attr = NULL_TREE; | |
599 | unsigned non_const = 0; | |
600 | ||
601 | for (ix = GOMP_DIM_MAX; ix--;) | |
602 | { | |
603 | tree clause = omp_find_clause (clauses, ids[ix]); | |
604 | tree dim = NULL_TREE; | |
605 | ||
606 | if (clause) | |
607 | dim = OMP_CLAUSE_EXPR (clause, ids[ix]); | |
608 | dims[ix] = dim; | |
609 | if (dim && TREE_CODE (dim) != INTEGER_CST) | |
610 | { | |
611 | dim = integer_zero_node; | |
612 | non_const |= GOMP_DIM_MASK (ix); | |
613 | } | |
614 | attr = tree_cons (NULL_TREE, dim, attr); | |
4954efd4 | 615 | } |
616 | ||
617 | oacc_replace_fn_attrib (fn, attr); | |
618 | ||
619 | if (non_const) | |
620 | { | |
621 | /* Push a dynamic argument set. */ | |
622 | args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM, | |
623 | NULL_TREE, non_const)); | |
624 | for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) | |
625 | if (non_const & GOMP_DIM_MASK (ix)) | |
626 | args->safe_push (dims[ix]); | |
627 | } | |
628 | } | |
629 | ||
5f7ea2ee | 630 | /* Verify OpenACC routine clauses. |
631 | ||
33dacef9 | 632 | Returns 0 if FNDECL should be marked with an OpenACC 'routine' directive, 1 |
633 | if it has already been marked in compatible way, and -1 if incompatible. | |
5f7ea2ee | 634 | Upon returning, the chain of clauses will contain exactly one clause |
635 | specifying the level of parallelism. */ | |
636 | ||
33dacef9 | 637 | int |
638 | oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc, | |
639 | const char *routine_str) | |
5f7ea2ee | 640 | { |
641 | tree c_level = NULL_TREE; | |
642 | tree c_p = NULL_TREE; | |
643 | for (tree c = *clauses; c; c_p = c, c = OMP_CLAUSE_CHAIN (c)) | |
644 | switch (OMP_CLAUSE_CODE (c)) | |
645 | { | |
646 | case OMP_CLAUSE_GANG: | |
647 | case OMP_CLAUSE_WORKER: | |
648 | case OMP_CLAUSE_VECTOR: | |
649 | case OMP_CLAUSE_SEQ: | |
650 | if (c_level == NULL_TREE) | |
651 | c_level = c; | |
652 | else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_CODE (c_level)) | |
653 | { | |
654 | /* This has already been diagnosed in the front ends. */ | |
655 | /* Drop the duplicate clause. */ | |
656 | gcc_checking_assert (c_p != NULL_TREE); | |
657 | OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c); | |
658 | c = c_p; | |
659 | } | |
660 | else | |
661 | { | |
662 | error_at (OMP_CLAUSE_LOCATION (c), | |
663 | "%qs specifies a conflicting level of parallelism", | |
664 | omp_clause_code_name[OMP_CLAUSE_CODE (c)]); | |
665 | inform (OMP_CLAUSE_LOCATION (c_level), | |
666 | "... to the previous %qs clause here", | |
667 | omp_clause_code_name[OMP_CLAUSE_CODE (c_level)]); | |
668 | /* Drop the conflicting clause. */ | |
669 | gcc_checking_assert (c_p != NULL_TREE); | |
670 | OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c); | |
671 | c = c_p; | |
672 | } | |
673 | break; | |
674 | default: | |
675 | gcc_unreachable (); | |
676 | } | |
677 | if (c_level == NULL_TREE) | |
678 | { | |
679 | /* Default to an implicit 'seq' clause. */ | |
680 | c_level = build_omp_clause (loc, OMP_CLAUSE_SEQ); | |
681 | OMP_CLAUSE_CHAIN (c_level) = *clauses; | |
682 | *clauses = c_level; | |
683 | } | |
33dacef9 | 684 | /* In *clauses, we now have exactly one clause specifying the level of |
685 | parallelism. */ | |
686 | ||
687 | tree attr | |
688 | = lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl)); | |
689 | if (attr != NULL_TREE) | |
690 | { | |
691 | /* If a "#pragma acc routine" has already been applied, just verify | |
692 | this one for compatibility. */ | |
693 | /* Collect previous directive's clauses. */ | |
694 | tree c_level_p = NULL_TREE; | |
695 | for (tree c = TREE_VALUE (attr); c; c = OMP_CLAUSE_CHAIN (c)) | |
696 | switch (OMP_CLAUSE_CODE (c)) | |
697 | { | |
698 | case OMP_CLAUSE_GANG: | |
699 | case OMP_CLAUSE_WORKER: | |
700 | case OMP_CLAUSE_VECTOR: | |
701 | case OMP_CLAUSE_SEQ: | |
702 | gcc_checking_assert (c_level_p == NULL_TREE); | |
703 | c_level_p = c; | |
704 | break; | |
705 | default: | |
706 | gcc_unreachable (); | |
707 | } | |
708 | gcc_checking_assert (c_level_p != NULL_TREE); | |
709 | /* ..., and compare to current directive's, which we've already collected | |
710 | above. */ | |
711 | tree c_diag; | |
712 | tree c_diag_p; | |
713 | /* Matching level of parallelism? */ | |
714 | if (OMP_CLAUSE_CODE (c_level) != OMP_CLAUSE_CODE (c_level_p)) | |
715 | { | |
716 | c_diag = c_level; | |
717 | c_diag_p = c_level_p; | |
718 | goto incompatible; | |
719 | } | |
720 | /* Compatible. */ | |
721 | return 1; | |
722 | ||
723 | incompatible: | |
724 | if (c_diag != NULL_TREE) | |
725 | error_at (OMP_CLAUSE_LOCATION (c_diag), | |
726 | "incompatible %qs clause when applying" | |
727 | " %<%s%> to %qD, which has already been" | |
728 | " marked with an OpenACC 'routine' directive", | |
729 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)], | |
730 | routine_str, fndecl); | |
731 | else if (c_diag_p != NULL_TREE) | |
732 | error_at (loc, | |
733 | "missing %qs clause when applying" | |
734 | " %<%s%> to %qD, which has already been" | |
735 | " marked with an OpenACC 'routine' directive", | |
736 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)], | |
737 | routine_str, fndecl); | |
738 | else | |
739 | gcc_unreachable (); | |
740 | if (c_diag_p != NULL_TREE) | |
741 | inform (OMP_CLAUSE_LOCATION (c_diag_p), | |
742 | "... with %qs clause here", | |
743 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)]); | |
744 | else | |
745 | { | |
746 | /* In the front ends, we don't preserve location information for the | |
747 | OpenACC routine directive itself. However, that of c_level_p | |
748 | should be close. */ | |
749 | location_t loc_routine = OMP_CLAUSE_LOCATION (c_level_p); | |
750 | inform (loc_routine, "... without %qs clause near to here", | |
751 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)]); | |
752 | } | |
753 | /* Incompatible. */ | |
754 | return -1; | |
755 | } | |
756 | ||
757 | return 0; | |
5f7ea2ee | 758 | } |
759 | ||
760 | /* Process the OpenACC 'routine' directive clauses to generate an attribute | |
761 | for the level of parallelism. All dimensions have a size of zero | |
4954efd4 | 762 | (dynamic). TREE_PURPOSE is set to indicate whether that dimension |
763 | can have a loop partitioned on it. non-zero indicates | |
764 | yes, zero indicates no. By construction once a non-zero has been | |
765 | reached, further inner dimensions must also be non-zero. We set | |
766 | TREE_VALUE to zero for the dimensions that may be partitioned and | |
767 | 1 for the other ones -- if a loop is (erroneously) spawned at | |
768 | an outer level, we don't want to try and partition it. */ | |
769 | ||
770 | tree | |
771 | oacc_build_routine_dims (tree clauses) | |
772 | { | |
773 | /* Must match GOMP_DIM ordering. */ | |
7c6746c9 | 774 | static const omp_clause_code ids[] |
775 | = {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ}; | |
4954efd4 | 776 | int ix; |
777 | int level = -1; | |
778 | ||
779 | for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses)) | |
780 | for (ix = GOMP_DIM_MAX + 1; ix--;) | |
781 | if (OMP_CLAUSE_CODE (clauses) == ids[ix]) | |
782 | { | |
4954efd4 | 783 | level = ix; |
784 | break; | |
785 | } | |
5f7ea2ee | 786 | gcc_checking_assert (level >= 0); |
4954efd4 | 787 | |
788 | tree dims = NULL_TREE; | |
789 | ||
790 | for (ix = GOMP_DIM_MAX; ix--;) | |
791 | dims = tree_cons (build_int_cst (boolean_type_node, ix >= level), | |
792 | build_int_cst (integer_type_node, ix < level), dims); | |
793 | ||
794 | return dims; | |
795 | } | |
796 | ||
797 | /* Retrieve the oacc function attrib and return it. Non-oacc | |
798 | functions will return NULL. */ | |
799 | ||
800 | tree | |
801 | oacc_get_fn_attrib (tree fn) | |
802 | { | |
803 | return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn)); | |
804 | } | |
805 | ||
c4b26cae | 806 | /* Return true if FN is an OpenMP or OpenACC offloading function. */ |
807 | ||
808 | bool | |
809 | offloading_function_p (tree fn) | |
810 | { | |
811 | tree attrs = DECL_ATTRIBUTES (fn); | |
812 | return (lookup_attribute ("omp declare target", attrs) | |
813 | || lookup_attribute ("omp target entrypoint", attrs)); | |
814 | } | |
815 | ||
4954efd4 | 816 | /* Extract an oacc execution dimension from FN. FN must be an |
817 | offloaded function or routine that has already had its execution | |
818 | dimensions lowered to the target-specific values. */ | |
819 | ||
820 | int | |
821 | oacc_get_fn_dim_size (tree fn, int axis) | |
822 | { | |
823 | tree attrs = oacc_get_fn_attrib (fn); | |
824 | ||
825 | gcc_assert (axis < GOMP_DIM_MAX); | |
826 | ||
827 | tree dims = TREE_VALUE (attrs); | |
828 | while (axis--) | |
829 | dims = TREE_CHAIN (dims); | |
830 | ||
831 | int size = TREE_INT_CST_LOW (TREE_VALUE (dims)); | |
832 | ||
833 | return size; | |
834 | } | |
835 | ||
836 | /* Extract the dimension axis from an IFN_GOACC_DIM_POS or | |
837 | IFN_GOACC_DIM_SIZE call. */ | |
838 | ||
839 | int | |
840 | oacc_get_ifn_dim_arg (const gimple *stmt) | |
841 | { | |
842 | gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE | |
843 | || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS); | |
844 | tree arg = gimple_call_arg (stmt, 0); | |
845 | HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg); | |
846 | ||
847 | gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX); | |
848 | return (int) axis; | |
849 | } |