]>
Commit | Line | Data |
---|---|---|
4954efd4 | 1 | /* General types and functions that are uselful for processing of OpenMP, |
2 | OpenACC and similar directivers at various stages of compilation. | |
3 | ||
fbd26352 | 4 | Copyright (C) 2005-2019 Free Software Foundation, Inc. |
4954efd4 | 5 | |
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 3, or (at your option) any later | |
11 | version. | |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | /* Find an OMP clause of type KIND within CLAUSES. */ | |
23 | ||
24 | #include "config.h" | |
25 | #include "system.h" | |
26 | #include "coretypes.h" | |
27 | #include "backend.h" | |
28 | #include "target.h" | |
29 | #include "tree.h" | |
30 | #include "gimple.h" | |
31 | #include "ssa.h" | |
32 | #include "diagnostic-core.h" | |
33 | #include "fold-const.h" | |
34 | #include "langhooks.h" | |
35 | #include "omp-general.h" | |
30a86690 | 36 | #include "stringpool.h" |
37 | #include "attribs.h" | |
4954efd4 | 38 | |
7e5a76c8 | 39 | enum omp_requires omp_requires_mask; |
40 | ||
4954efd4 | 41 | tree |
42 | omp_find_clause (tree clauses, enum omp_clause_code kind) | |
43 | { | |
44 | for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) | |
45 | if (OMP_CLAUSE_CODE (clauses) == kind) | |
46 | return clauses; | |
47 | ||
48 | return NULL_TREE; | |
49 | } | |
50 | ||
51 | /* Return true if DECL is a reference type. */ | |
52 | ||
53 | bool | |
54 | omp_is_reference (tree decl) | |
55 | { | |
56 | return lang_hooks.decls.omp_privatize_by_reference (decl); | |
57 | } | |
58 | ||
4226cb1d | 59 | /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or GT_EXPR, |
60 | given that V is the loop index variable and STEP is loop step. */ | |
4954efd4 | 61 | |
62 | void | |
4226cb1d | 63 | omp_adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2, |
64 | tree v, tree step) | |
4954efd4 | 65 | { |
66 | switch (*cond_code) | |
67 | { | |
68 | case LT_EXPR: | |
69 | case GT_EXPR: | |
4226cb1d | 70 | break; |
71 | ||
4954efd4 | 72 | case NE_EXPR: |
4226cb1d | 73 | gcc_assert (TREE_CODE (step) == INTEGER_CST); |
74 | if (TREE_CODE (TREE_TYPE (v)) == INTEGER_TYPE) | |
75 | { | |
76 | if (integer_onep (step)) | |
77 | *cond_code = LT_EXPR; | |
78 | else | |
79 | { | |
80 | gcc_assert (integer_minus_onep (step)); | |
81 | *cond_code = GT_EXPR; | |
82 | } | |
83 | } | |
84 | else | |
85 | { | |
86 | tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (v))); | |
87 | gcc_assert (TREE_CODE (unit) == INTEGER_CST); | |
88 | if (tree_int_cst_equal (unit, step)) | |
89 | *cond_code = LT_EXPR; | |
90 | else | |
91 | { | |
92 | gcc_assert (wi::neg (wi::to_widest (unit)) | |
93 | == wi::to_widest (step)); | |
94 | *cond_code = GT_EXPR; | |
95 | } | |
96 | } | |
97 | ||
4954efd4 | 98 | break; |
4226cb1d | 99 | |
4954efd4 | 100 | case LE_EXPR: |
101 | if (POINTER_TYPE_P (TREE_TYPE (*n2))) | |
102 | *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1); | |
103 | else | |
104 | *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2, | |
105 | build_int_cst (TREE_TYPE (*n2), 1)); | |
106 | *cond_code = LT_EXPR; | |
107 | break; | |
108 | case GE_EXPR: | |
109 | if (POINTER_TYPE_P (TREE_TYPE (*n2))) | |
110 | *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1); | |
111 | else | |
112 | *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2, | |
113 | build_int_cst (TREE_TYPE (*n2), 1)); | |
114 | *cond_code = GT_EXPR; | |
115 | break; | |
116 | default: | |
117 | gcc_unreachable (); | |
118 | } | |
119 | } | |
120 | ||
121 | /* Return the looping step from INCR, extracted from the step of a gimple omp | |
122 | for statement. */ | |
123 | ||
124 | tree | |
125 | omp_get_for_step_from_incr (location_t loc, tree incr) | |
126 | { | |
127 | tree step; | |
128 | switch (TREE_CODE (incr)) | |
129 | { | |
130 | case PLUS_EXPR: | |
131 | step = TREE_OPERAND (incr, 1); | |
132 | break; | |
133 | case POINTER_PLUS_EXPR: | |
134 | step = fold_convert (ssizetype, TREE_OPERAND (incr, 1)); | |
135 | break; | |
136 | case MINUS_EXPR: | |
137 | step = TREE_OPERAND (incr, 1); | |
138 | step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step); | |
139 | break; | |
140 | default: | |
141 | gcc_unreachable (); | |
142 | } | |
143 | return step; | |
144 | } | |
145 | ||
146 | /* Extract the header elements of parallel loop FOR_STMT and store | |
147 | them into *FD. */ | |
148 | ||
149 | void | |
150 | omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, | |
151 | struct omp_for_data_loop *loops) | |
152 | { | |
153 | tree t, var, *collapse_iter, *collapse_count; | |
154 | tree count = NULL_TREE, iter_type = long_integer_type_node; | |
155 | struct omp_for_data_loop *loop; | |
156 | int i; | |
157 | struct omp_for_data_loop dummy_loop; | |
158 | location_t loc = gimple_location (for_stmt); | |
159 | bool simd = gimple_omp_for_kind (for_stmt) & GF_OMP_FOR_SIMD; | |
160 | bool distribute = gimple_omp_for_kind (for_stmt) | |
161 | == GF_OMP_FOR_KIND_DISTRIBUTE; | |
162 | bool taskloop = gimple_omp_for_kind (for_stmt) | |
163 | == GF_OMP_FOR_KIND_TASKLOOP; | |
164 | tree iterv, countv; | |
165 | ||
166 | fd->for_stmt = for_stmt; | |
167 | fd->pre = NULL; | |
4954efd4 | 168 | fd->have_nowait = distribute || simd; |
169 | fd->have_ordered = false; | |
7e5a76c8 | 170 | fd->have_reductemp = false; |
48152aa2 | 171 | fd->have_pointer_condtemp = false; |
9a1d892b | 172 | fd->lastprivate_conditional = 0; |
719a7570 | 173 | fd->tiling = NULL_TREE; |
4954efd4 | 174 | fd->collapse = 1; |
175 | fd->ordered = 0; | |
176 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; | |
177 | fd->sched_modifiers = 0; | |
178 | fd->chunk_size = NULL_TREE; | |
179 | fd->simd_schedule = false; | |
4954efd4 | 180 | collapse_iter = NULL; |
181 | collapse_count = NULL; | |
182 | ||
183 | for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t)) | |
184 | switch (OMP_CLAUSE_CODE (t)) | |
185 | { | |
186 | case OMP_CLAUSE_NOWAIT: | |
187 | fd->have_nowait = true; | |
188 | break; | |
189 | case OMP_CLAUSE_ORDERED: | |
190 | fd->have_ordered = true; | |
191 | if (OMP_CLAUSE_ORDERED_EXPR (t)) | |
192 | fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t)); | |
193 | break; | |
194 | case OMP_CLAUSE_SCHEDULE: | |
195 | gcc_assert (!distribute && !taskloop); | |
196 | fd->sched_kind | |
197 | = (enum omp_clause_schedule_kind) | |
198 | (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK); | |
199 | fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t) | |
200 | & ~OMP_CLAUSE_SCHEDULE_MASK); | |
201 | fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); | |
202 | fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t); | |
203 | break; | |
204 | case OMP_CLAUSE_DIST_SCHEDULE: | |
205 | gcc_assert (distribute); | |
206 | fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t); | |
207 | break; | |
208 | case OMP_CLAUSE_COLLAPSE: | |
209 | fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t)); | |
210 | if (fd->collapse > 1) | |
211 | { | |
212 | collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t); | |
213 | collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t); | |
214 | } | |
215 | break; | |
719a7570 | 216 | case OMP_CLAUSE_TILE: |
217 | fd->tiling = OMP_CLAUSE_TILE_LIST (t); | |
218 | fd->collapse = list_length (fd->tiling); | |
219 | gcc_assert (fd->collapse); | |
220 | collapse_iter = &OMP_CLAUSE_TILE_ITERVAR (t); | |
221 | collapse_count = &OMP_CLAUSE_TILE_COUNT (t); | |
222 | break; | |
7e5a76c8 | 223 | case OMP_CLAUSE__REDUCTEMP_: |
224 | fd->have_reductemp = true; | |
9a1d892b | 225 | break; |
226 | case OMP_CLAUSE_LASTPRIVATE: | |
227 | if (OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (t)) | |
228 | fd->lastprivate_conditional++; | |
229 | break; | |
48152aa2 | 230 | case OMP_CLAUSE__CONDTEMP_: |
231 | if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t)))) | |
232 | fd->have_pointer_condtemp = true; | |
233 | break; | |
4954efd4 | 234 | default: |
235 | break; | |
236 | } | |
719a7570 | 237 | |
238 | if (fd->collapse > 1 || fd->tiling) | |
239 | fd->loops = loops; | |
240 | else | |
241 | fd->loops = &fd->loop; | |
242 | ||
4954efd4 | 243 | if (fd->ordered && fd->collapse == 1 && loops != NULL) |
244 | { | |
245 | fd->loops = loops; | |
246 | iterv = NULL_TREE; | |
247 | countv = NULL_TREE; | |
248 | collapse_iter = &iterv; | |
249 | collapse_count = &countv; | |
250 | } | |
251 | ||
252 | /* FIXME: for now map schedule(auto) to schedule(static). | |
253 | There should be analysis to determine whether all iterations | |
254 | are approximately the same amount of work (then schedule(static) | |
255 | is best) or if it varies (then schedule(dynamic,N) is better). */ | |
256 | if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO) | |
257 | { | |
258 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; | |
259 | gcc_assert (fd->chunk_size == NULL); | |
260 | } | |
719a7570 | 261 | gcc_assert ((fd->collapse == 1 && !fd->tiling) || collapse_iter != NULL); |
4954efd4 | 262 | if (taskloop) |
263 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME; | |
264 | if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) | |
265 | gcc_assert (fd->chunk_size == NULL); | |
266 | else if (fd->chunk_size == NULL) | |
267 | { | |
268 | /* We only need to compute a default chunk size for ordered | |
269 | static loops and dynamic loops. */ | |
270 | if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC | |
271 | || fd->have_ordered) | |
272 | fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) | |
273 | ? integer_zero_node : integer_one_node; | |
274 | } | |
275 | ||
276 | int cnt = fd->ordered ? fd->ordered : fd->collapse; | |
277 | for (i = 0; i < cnt; i++) | |
278 | { | |
719a7570 | 279 | if (i == 0 |
280 | && fd->collapse == 1 | |
281 | && !fd->tiling | |
282 | && (fd->ordered == 0 || loops == NULL)) | |
4954efd4 | 283 | loop = &fd->loop; |
284 | else if (loops != NULL) | |
285 | loop = loops + i; | |
286 | else | |
287 | loop = &dummy_loop; | |
288 | ||
289 | loop->v = gimple_omp_for_index (for_stmt, i); | |
290 | gcc_assert (SSA_VAR_P (loop->v)); | |
291 | gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE | |
292 | || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE); | |
293 | var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v; | |
294 | loop->n1 = gimple_omp_for_initial (for_stmt, i); | |
295 | ||
296 | loop->cond_code = gimple_omp_for_cond (for_stmt, i); | |
297 | loop->n2 = gimple_omp_for_final (for_stmt, i); | |
7e5a76c8 | 298 | gcc_assert (loop->cond_code != NE_EXPR |
299 | || (gimple_omp_for_kind (for_stmt) | |
300 | != GF_OMP_FOR_KIND_OACC_LOOP)); | |
4954efd4 | 301 | |
302 | t = gimple_omp_for_incr (for_stmt, i); | |
303 | gcc_assert (TREE_OPERAND (t, 0) == var); | |
304 | loop->step = omp_get_for_step_from_incr (loc, t); | |
305 | ||
4226cb1d | 306 | omp_adjust_for_condition (loc, &loop->cond_code, &loop->n2, loop->v, |
307 | loop->step); | |
7e5a76c8 | 308 | |
4954efd4 | 309 | if (simd |
310 | || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC | |
311 | && !fd->have_ordered)) | |
312 | { | |
719a7570 | 313 | if (fd->collapse == 1 && !fd->tiling) |
4954efd4 | 314 | iter_type = TREE_TYPE (loop->v); |
315 | else if (i == 0 | |
316 | || TYPE_PRECISION (iter_type) | |
317 | < TYPE_PRECISION (TREE_TYPE (loop->v))) | |
318 | iter_type | |
319 | = build_nonstandard_integer_type | |
320 | (TYPE_PRECISION (TREE_TYPE (loop->v)), 1); | |
321 | } | |
322 | else if (iter_type != long_long_unsigned_type_node) | |
323 | { | |
324 | if (POINTER_TYPE_P (TREE_TYPE (loop->v))) | |
325 | iter_type = long_long_unsigned_type_node; | |
326 | else if (TYPE_UNSIGNED (TREE_TYPE (loop->v)) | |
327 | && TYPE_PRECISION (TREE_TYPE (loop->v)) | |
328 | >= TYPE_PRECISION (iter_type)) | |
329 | { | |
330 | tree n; | |
331 | ||
332 | if (loop->cond_code == LT_EXPR) | |
7e5a76c8 | 333 | n = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v), |
334 | loop->n2, loop->step); | |
4954efd4 | 335 | else |
336 | n = loop->n1; | |
337 | if (TREE_CODE (n) != INTEGER_CST | |
338 | || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n)) | |
339 | iter_type = long_long_unsigned_type_node; | |
340 | } | |
341 | else if (TYPE_PRECISION (TREE_TYPE (loop->v)) | |
342 | > TYPE_PRECISION (iter_type)) | |
343 | { | |
344 | tree n1, n2; | |
345 | ||
346 | if (loop->cond_code == LT_EXPR) | |
347 | { | |
348 | n1 = loop->n1; | |
7e5a76c8 | 349 | n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v), |
350 | loop->n2, loop->step); | |
4954efd4 | 351 | } |
352 | else | |
353 | { | |
7e5a76c8 | 354 | n1 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (loop->v), |
355 | loop->n2, loop->step); | |
4954efd4 | 356 | n2 = loop->n1; |
357 | } | |
358 | if (TREE_CODE (n1) != INTEGER_CST | |
359 | || TREE_CODE (n2) != INTEGER_CST | |
360 | || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1) | |
361 | || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type))) | |
362 | iter_type = long_long_unsigned_type_node; | |
363 | } | |
364 | } | |
365 | ||
366 | if (i >= fd->collapse) | |
367 | continue; | |
368 | ||
369 | if (collapse_count && *collapse_count == NULL) | |
370 | { | |
371 | t = fold_binary (loop->cond_code, boolean_type_node, | |
372 | fold_convert (TREE_TYPE (loop->v), loop->n1), | |
373 | fold_convert (TREE_TYPE (loop->v), loop->n2)); | |
374 | if (t && integer_zerop (t)) | |
375 | count = build_zero_cst (long_long_unsigned_type_node); | |
376 | else if ((i == 0 || count != NULL_TREE) | |
377 | && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE | |
378 | && TREE_CONSTANT (loop->n1) | |
379 | && TREE_CONSTANT (loop->n2) | |
380 | && TREE_CODE (loop->step) == INTEGER_CST) | |
381 | { | |
382 | tree itype = TREE_TYPE (loop->v); | |
383 | ||
384 | if (POINTER_TYPE_P (itype)) | |
385 | itype = signed_type_for (itype); | |
386 | t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1)); | |
7e5a76c8 | 387 | t = fold_build2_loc (loc, PLUS_EXPR, itype, |
388 | fold_convert_loc (loc, itype, loop->step), | |
389 | t); | |
4954efd4 | 390 | t = fold_build2_loc (loc, PLUS_EXPR, itype, t, |
7e5a76c8 | 391 | fold_convert_loc (loc, itype, loop->n2)); |
4954efd4 | 392 | t = fold_build2_loc (loc, MINUS_EXPR, itype, t, |
7e5a76c8 | 393 | fold_convert_loc (loc, itype, loop->n1)); |
4954efd4 | 394 | if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) |
7e5a76c8 | 395 | { |
396 | tree step = fold_convert_loc (loc, itype, loop->step); | |
397 | t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, | |
398 | fold_build1_loc (loc, NEGATE_EXPR, | |
399 | itype, t), | |
400 | fold_build1_loc (loc, NEGATE_EXPR, | |
401 | itype, step)); | |
402 | } | |
4954efd4 | 403 | else |
404 | t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t, | |
7e5a76c8 | 405 | fold_convert_loc (loc, itype, |
406 | loop->step)); | |
4954efd4 | 407 | t = fold_convert_loc (loc, long_long_unsigned_type_node, t); |
408 | if (count != NULL_TREE) | |
7e5a76c8 | 409 | count = fold_build2_loc (loc, MULT_EXPR, |
410 | long_long_unsigned_type_node, | |
411 | count, t); | |
4954efd4 | 412 | else |
413 | count = t; | |
414 | if (TREE_CODE (count) != INTEGER_CST) | |
415 | count = NULL_TREE; | |
416 | } | |
417 | else if (count && !integer_zerop (count)) | |
418 | count = NULL_TREE; | |
419 | } | |
420 | } | |
421 | ||
422 | if (count | |
423 | && !simd | |
424 | && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC | |
425 | || fd->have_ordered)) | |
426 | { | |
427 | if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node))) | |
428 | iter_type = long_long_unsigned_type_node; | |
429 | else | |
430 | iter_type = long_integer_type_node; | |
431 | } | |
432 | else if (collapse_iter && *collapse_iter != NULL) | |
433 | iter_type = TREE_TYPE (*collapse_iter); | |
434 | fd->iter_type = iter_type; | |
435 | if (collapse_iter && *collapse_iter == NULL) | |
436 | *collapse_iter = create_tmp_var (iter_type, ".iter"); | |
437 | if (collapse_count && *collapse_count == NULL) | |
438 | { | |
439 | if (count) | |
440 | *collapse_count = fold_convert_loc (loc, iter_type, count); | |
441 | else | |
442 | *collapse_count = create_tmp_var (iter_type, ".count"); | |
443 | } | |
444 | ||
719a7570 | 445 | if (fd->collapse > 1 || fd->tiling || (fd->ordered && loops)) |
4954efd4 | 446 | { |
447 | fd->loop.v = *collapse_iter; | |
448 | fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0); | |
449 | fd->loop.n2 = *collapse_count; | |
450 | fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); | |
451 | fd->loop.cond_code = LT_EXPR; | |
452 | } | |
453 | else if (loops) | |
454 | loops[0] = fd->loop; | |
455 | } | |
456 | ||
457 | /* Build a call to GOMP_barrier. */ | |
458 | ||
459 | gimple * | |
460 | omp_build_barrier (tree lhs) | |
461 | { | |
462 | tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL | |
463 | : BUILT_IN_GOMP_BARRIER); | |
464 | gcall *g = gimple_build_call (fndecl, 0); | |
465 | if (lhs) | |
466 | gimple_call_set_lhs (g, lhs); | |
467 | return g; | |
468 | } | |
469 | ||
470 | /* Return maximum possible vectorization factor for the target. */ | |
471 | ||
9d805ed8 | 472 | poly_uint64 |
4954efd4 | 473 | omp_max_vf (void) |
474 | { | |
475 | if (!optimize | |
476 | || optimize_debug | |
477 | || !flag_tree_loop_optimize | |
478 | || (!flag_tree_loop_vectorize | |
52e94bf8 | 479 | && global_options_set.x_flag_tree_loop_vectorize)) |
4954efd4 | 480 | return 1; |
481 | ||
3106770a | 482 | auto_vector_sizes sizes; |
e7419472 | 483 | targetm.vectorize.autovectorize_vector_sizes (&sizes, true); |
3106770a | 484 | if (!sizes.is_empty ()) |
4954efd4 | 485 | { |
3106770a | 486 | poly_uint64 vf = 0; |
487 | for (unsigned int i = 0; i < sizes.length (); ++i) | |
488 | vf = ordered_max (vf, sizes[i]); | |
489 | return vf; | |
4954efd4 | 490 | } |
3106770a | 491 | |
492 | machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode); | |
493 | if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT) | |
494 | return GET_MODE_NUNITS (vqimode); | |
495 | ||
496 | return 1; | |
4954efd4 | 497 | } |
498 | ||
499 | /* Return maximum SIMT width if offloading may target SIMT hardware. */ | |
500 | ||
501 | int | |
502 | omp_max_simt_vf (void) | |
503 | { | |
504 | if (!optimize) | |
505 | return 0; | |
506 | if (ENABLE_OFFLOADING) | |
7c6746c9 | 507 | for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;) |
4954efd4 | 508 | { |
509 | if (!strncmp (c, "nvptx", strlen ("nvptx"))) | |
510 | return 32; | |
511 | else if ((c = strchr (c, ','))) | |
512 | c++; | |
513 | } | |
514 | return 0; | |
515 | } | |
516 | ||
517 | /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK | |
518 | macro on gomp-constants.h. We do not check for overflow. */ | |
519 | ||
520 | tree | |
521 | oacc_launch_pack (unsigned code, tree device, unsigned op) | |
522 | { | |
523 | tree res; | |
524 | ||
525 | res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op)); | |
526 | if (device) | |
527 | { | |
528 | device = fold_build2 (LSHIFT_EXPR, unsigned_type_node, | |
529 | device, build_int_cst (unsigned_type_node, | |
530 | GOMP_LAUNCH_DEVICE_SHIFT)); | |
531 | res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device); | |
532 | } | |
533 | return res; | |
534 | } | |
535 | ||
536 | /* FIXME: What is the following comment for? */ | |
537 | /* Look for compute grid dimension clauses and convert to an attribute | |
538 | attached to FN. This permits the target-side code to (a) massage | |
539 | the dimensions, (b) emit that data and (c) optimize. Non-constant | |
540 | dimensions are pushed onto ARGS. | |
541 | ||
542 | The attribute value is a TREE_LIST. A set of dimensions is | |
543 | represented as a list of INTEGER_CST. Those that are runtime | |
544 | exprs are represented as an INTEGER_CST of zero. | |
545 | ||
7c6746c9 | 546 | TODO: Normally the attribute will just contain a single such list. If |
4954efd4 | 547 | however it contains a list of lists, this will represent the use of |
548 | device_type. Each member of the outer list is an assoc list of | |
549 | dimensions, keyed by the device type. The first entry will be the | |
550 | default. Well, that's the plan. */ | |
551 | ||
552 | /* Replace any existing oacc fn attribute with updated dimensions. */ | |
553 | ||
1d740b07 | 554 | /* Variant working on a list of attributes. */ |
555 | ||
556 | tree | |
557 | oacc_replace_fn_attrib_attr (tree attribs, tree dims) | |
4954efd4 | 558 | { |
559 | tree ident = get_identifier (OACC_FN_ATTRIB); | |
4954efd4 | 560 | |
561 | /* If we happen to be present as the first attrib, drop it. */ | |
562 | if (attribs && TREE_PURPOSE (attribs) == ident) | |
563 | attribs = TREE_CHAIN (attribs); | |
1d740b07 | 564 | return tree_cons (ident, dims, attribs); |
565 | } | |
566 | ||
567 | /* Variant working on a function decl. */ | |
568 | ||
569 | void | |
570 | oacc_replace_fn_attrib (tree fn, tree dims) | |
571 | { | |
572 | DECL_ATTRIBUTES (fn) | |
573 | = oacc_replace_fn_attrib_attr (DECL_ATTRIBUTES (fn), dims); | |
4954efd4 | 574 | } |
575 | ||
576 | /* Scan CLAUSES for launch dimensions and attach them to the oacc | |
577 | function attribute. Push any that are non-constant onto the ARGS | |
1d3ea8fc | 578 | list, along with an appropriate GOMP_LAUNCH_DIM tag. */ |
4954efd4 | 579 | |
580 | void | |
1d3ea8fc | 581 | oacc_set_fn_attrib (tree fn, tree clauses, vec<tree> *args) |
4954efd4 | 582 | { |
583 | /* Must match GOMP_DIM ordering. */ | |
584 | static const omp_clause_code ids[] | |
585 | = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, | |
586 | OMP_CLAUSE_VECTOR_LENGTH }; | |
587 | unsigned ix; | |
588 | tree dims[GOMP_DIM_MAX]; | |
589 | ||
590 | tree attr = NULL_TREE; | |
591 | unsigned non_const = 0; | |
592 | ||
593 | for (ix = GOMP_DIM_MAX; ix--;) | |
594 | { | |
595 | tree clause = omp_find_clause (clauses, ids[ix]); | |
596 | tree dim = NULL_TREE; | |
597 | ||
598 | if (clause) | |
599 | dim = OMP_CLAUSE_EXPR (clause, ids[ix]); | |
600 | dims[ix] = dim; | |
601 | if (dim && TREE_CODE (dim) != INTEGER_CST) | |
602 | { | |
603 | dim = integer_zero_node; | |
604 | non_const |= GOMP_DIM_MASK (ix); | |
605 | } | |
606 | attr = tree_cons (NULL_TREE, dim, attr); | |
4954efd4 | 607 | } |
608 | ||
609 | oacc_replace_fn_attrib (fn, attr); | |
610 | ||
611 | if (non_const) | |
612 | { | |
613 | /* Push a dynamic argument set. */ | |
614 | args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM, | |
615 | NULL_TREE, non_const)); | |
616 | for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) | |
617 | if (non_const & GOMP_DIM_MASK (ix)) | |
618 | args->safe_push (dims[ix]); | |
619 | } | |
620 | } | |
621 | ||
5f7ea2ee | 622 | /* Verify OpenACC routine clauses. |
623 | ||
33dacef9 | 624 | Returns 0 if FNDECL should be marked with an OpenACC 'routine' directive, 1 |
625 | if it has already been marked in compatible way, and -1 if incompatible. | |
5f7ea2ee | 626 | Upon returning, the chain of clauses will contain exactly one clause |
627 | specifying the level of parallelism. */ | |
628 | ||
33dacef9 | 629 | int |
630 | oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc, | |
631 | const char *routine_str) | |
5f7ea2ee | 632 | { |
633 | tree c_level = NULL_TREE; | |
634 | tree c_p = NULL_TREE; | |
635 | for (tree c = *clauses; c; c_p = c, c = OMP_CLAUSE_CHAIN (c)) | |
636 | switch (OMP_CLAUSE_CODE (c)) | |
637 | { | |
638 | case OMP_CLAUSE_GANG: | |
639 | case OMP_CLAUSE_WORKER: | |
640 | case OMP_CLAUSE_VECTOR: | |
641 | case OMP_CLAUSE_SEQ: | |
642 | if (c_level == NULL_TREE) | |
643 | c_level = c; | |
644 | else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_CODE (c_level)) | |
645 | { | |
646 | /* This has already been diagnosed in the front ends. */ | |
647 | /* Drop the duplicate clause. */ | |
648 | gcc_checking_assert (c_p != NULL_TREE); | |
649 | OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c); | |
650 | c = c_p; | |
651 | } | |
652 | else | |
653 | { | |
654 | error_at (OMP_CLAUSE_LOCATION (c), | |
655 | "%qs specifies a conflicting level of parallelism", | |
656 | omp_clause_code_name[OMP_CLAUSE_CODE (c)]); | |
657 | inform (OMP_CLAUSE_LOCATION (c_level), | |
658 | "... to the previous %qs clause here", | |
659 | omp_clause_code_name[OMP_CLAUSE_CODE (c_level)]); | |
660 | /* Drop the conflicting clause. */ | |
661 | gcc_checking_assert (c_p != NULL_TREE); | |
662 | OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c); | |
663 | c = c_p; | |
664 | } | |
665 | break; | |
666 | default: | |
667 | gcc_unreachable (); | |
668 | } | |
669 | if (c_level == NULL_TREE) | |
670 | { | |
671 | /* Default to an implicit 'seq' clause. */ | |
672 | c_level = build_omp_clause (loc, OMP_CLAUSE_SEQ); | |
673 | OMP_CLAUSE_CHAIN (c_level) = *clauses; | |
674 | *clauses = c_level; | |
675 | } | |
33dacef9 | 676 | /* In *clauses, we now have exactly one clause specifying the level of |
677 | parallelism. */ | |
678 | ||
679 | tree attr | |
680 | = lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl)); | |
681 | if (attr != NULL_TREE) | |
682 | { | |
683 | /* If a "#pragma acc routine" has already been applied, just verify | |
684 | this one for compatibility. */ | |
685 | /* Collect previous directive's clauses. */ | |
686 | tree c_level_p = NULL_TREE; | |
687 | for (tree c = TREE_VALUE (attr); c; c = OMP_CLAUSE_CHAIN (c)) | |
688 | switch (OMP_CLAUSE_CODE (c)) | |
689 | { | |
690 | case OMP_CLAUSE_GANG: | |
691 | case OMP_CLAUSE_WORKER: | |
692 | case OMP_CLAUSE_VECTOR: | |
693 | case OMP_CLAUSE_SEQ: | |
694 | gcc_checking_assert (c_level_p == NULL_TREE); | |
695 | c_level_p = c; | |
696 | break; | |
697 | default: | |
698 | gcc_unreachable (); | |
699 | } | |
700 | gcc_checking_assert (c_level_p != NULL_TREE); | |
701 | /* ..., and compare to current directive's, which we've already collected | |
702 | above. */ | |
703 | tree c_diag; | |
704 | tree c_diag_p; | |
705 | /* Matching level of parallelism? */ | |
706 | if (OMP_CLAUSE_CODE (c_level) != OMP_CLAUSE_CODE (c_level_p)) | |
707 | { | |
708 | c_diag = c_level; | |
709 | c_diag_p = c_level_p; | |
710 | goto incompatible; | |
711 | } | |
712 | /* Compatible. */ | |
713 | return 1; | |
714 | ||
715 | incompatible: | |
716 | if (c_diag != NULL_TREE) | |
717 | error_at (OMP_CLAUSE_LOCATION (c_diag), | |
718 | "incompatible %qs clause when applying" | |
719 | " %<%s%> to %qD, which has already been" | |
720 | " marked with an OpenACC 'routine' directive", | |
721 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)], | |
722 | routine_str, fndecl); | |
723 | else if (c_diag_p != NULL_TREE) | |
724 | error_at (loc, | |
725 | "missing %qs clause when applying" | |
726 | " %<%s%> to %qD, which has already been" | |
727 | " marked with an OpenACC 'routine' directive", | |
728 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)], | |
729 | routine_str, fndecl); | |
730 | else | |
731 | gcc_unreachable (); | |
732 | if (c_diag_p != NULL_TREE) | |
733 | inform (OMP_CLAUSE_LOCATION (c_diag_p), | |
734 | "... with %qs clause here", | |
735 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)]); | |
736 | else | |
737 | { | |
738 | /* In the front ends, we don't preserve location information for the | |
739 | OpenACC routine directive itself. However, that of c_level_p | |
740 | should be close. */ | |
741 | location_t loc_routine = OMP_CLAUSE_LOCATION (c_level_p); | |
742 | inform (loc_routine, "... without %qs clause near to here", | |
743 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)]); | |
744 | } | |
745 | /* Incompatible. */ | |
746 | return -1; | |
747 | } | |
748 | ||
749 | return 0; | |
5f7ea2ee | 750 | } |
751 | ||
752 | /* Process the OpenACC 'routine' directive clauses to generate an attribute | |
753 | for the level of parallelism. All dimensions have a size of zero | |
4954efd4 | 754 | (dynamic). TREE_PURPOSE is set to indicate whether that dimension |
755 | can have a loop partitioned on it. non-zero indicates | |
756 | yes, zero indicates no. By construction once a non-zero has been | |
757 | reached, further inner dimensions must also be non-zero. We set | |
758 | TREE_VALUE to zero for the dimensions that may be partitioned and | |
759 | 1 for the other ones -- if a loop is (erroneously) spawned at | |
760 | an outer level, we don't want to try and partition it. */ | |
761 | ||
762 | tree | |
763 | oacc_build_routine_dims (tree clauses) | |
764 | { | |
765 | /* Must match GOMP_DIM ordering. */ | |
7c6746c9 | 766 | static const omp_clause_code ids[] |
767 | = {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ}; | |
4954efd4 | 768 | int ix; |
769 | int level = -1; | |
770 | ||
771 | for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses)) | |
772 | for (ix = GOMP_DIM_MAX + 1; ix--;) | |
773 | if (OMP_CLAUSE_CODE (clauses) == ids[ix]) | |
774 | { | |
4954efd4 | 775 | level = ix; |
776 | break; | |
777 | } | |
5f7ea2ee | 778 | gcc_checking_assert (level >= 0); |
4954efd4 | 779 | |
780 | tree dims = NULL_TREE; | |
781 | ||
782 | for (ix = GOMP_DIM_MAX; ix--;) | |
783 | dims = tree_cons (build_int_cst (boolean_type_node, ix >= level), | |
784 | build_int_cst (integer_type_node, ix < level), dims); | |
785 | ||
786 | return dims; | |
787 | } | |
788 | ||
789 | /* Retrieve the oacc function attrib and return it. Non-oacc | |
790 | functions will return NULL. */ | |
791 | ||
792 | tree | |
793 | oacc_get_fn_attrib (tree fn) | |
794 | { | |
795 | return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn)); | |
796 | } | |
797 | ||
c4b26cae | 798 | /* Return true if FN is an OpenMP or OpenACC offloading function. */ |
799 | ||
800 | bool | |
801 | offloading_function_p (tree fn) | |
802 | { | |
803 | tree attrs = DECL_ATTRIBUTES (fn); | |
804 | return (lookup_attribute ("omp declare target", attrs) | |
805 | || lookup_attribute ("omp target entrypoint", attrs)); | |
806 | } | |
807 | ||
4954efd4 | 808 | /* Extract an oacc execution dimension from FN. FN must be an |
809 | offloaded function or routine that has already had its execution | |
810 | dimensions lowered to the target-specific values. */ | |
811 | ||
812 | int | |
813 | oacc_get_fn_dim_size (tree fn, int axis) | |
814 | { | |
815 | tree attrs = oacc_get_fn_attrib (fn); | |
816 | ||
817 | gcc_assert (axis < GOMP_DIM_MAX); | |
818 | ||
819 | tree dims = TREE_VALUE (attrs); | |
820 | while (axis--) | |
821 | dims = TREE_CHAIN (dims); | |
822 | ||
823 | int size = TREE_INT_CST_LOW (TREE_VALUE (dims)); | |
824 | ||
825 | return size; | |
826 | } | |
827 | ||
828 | /* Extract the dimension axis from an IFN_GOACC_DIM_POS or | |
829 | IFN_GOACC_DIM_SIZE call. */ | |
830 | ||
831 | int | |
832 | oacc_get_ifn_dim_arg (const gimple *stmt) | |
833 | { | |
834 | gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE | |
835 | || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS); | |
836 | tree arg = gimple_call_arg (stmt, 0); | |
837 | HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg); | |
838 | ||
839 | gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX); | |
840 | return (int) axis; | |
841 | } |