/* General types and functions that are uselful for processing of OpenMP,
OpenACC and similar directivers at various stages of compilation.
- Copyright (C) 2005-2016 Free Software Foundation, Inc.
+ Copyright (C) 2005-2019 Free Software Foundation, Inc.
This file is part of GCC.
#include "fold-const.h"
#include "langhooks.h"
#include "omp-general.h"
+#include "stringpool.h"
+#include "attribs.h"
+enum omp_requires omp_requires_mask;
tree
omp_find_clause (tree clauses, enum omp_clause_code kind)
return NULL_TREE;
}
+/* True if OpenMP should regard this DECL as being a scalar which has Fortran's
+ allocatable or pointer attribute. */
+bool
+omp_is_allocatable_or_ptr (tree decl)
+{
+ return lang_hooks.decls.omp_is_allocatable_or_ptr (decl);
+}
+
+/* Return true if DECL is a Fortran optional argument. */
+
+bool
+omp_is_optional_argument (tree decl)
+{
+ return lang_hooks.decls.omp_is_optional_argument (decl);
+}
+
/* Return true if DECL is a reference type. */
bool
return lang_hooks.decls.omp_privatize_by_reference (decl);
}
-/* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or
- GT_EXPR. */
+/* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or GT_EXPR,
+ given that V is the loop index variable and STEP is loop step. */
void
-omp_adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2)
+omp_adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2,
+ tree v, tree step)
{
switch (*cond_code)
{
case LT_EXPR:
case GT_EXPR:
+ break;
+
case NE_EXPR:
+ gcc_assert (TREE_CODE (step) == INTEGER_CST);
+ if (TREE_CODE (TREE_TYPE (v)) == INTEGER_TYPE)
+ {
+ if (integer_onep (step))
+ *cond_code = LT_EXPR;
+ else
+ {
+ gcc_assert (integer_minus_onep (step));
+ *cond_code = GT_EXPR;
+ }
+ }
+ else
+ {
+ tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (v)));
+ gcc_assert (TREE_CODE (unit) == INTEGER_CST);
+ if (tree_int_cst_equal (unit, step))
+ *cond_code = LT_EXPR;
+ else
+ {
+ gcc_assert (wi::neg (wi::to_widest (unit))
+ == wi::to_widest (step));
+ *cond_code = GT_EXPR;
+ }
+ }
+
break;
+
case LE_EXPR:
if (POINTER_TYPE_P (TREE_TYPE (*n2)))
*n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1);
int i;
struct omp_for_data_loop dummy_loop;
location_t loc = gimple_location (for_stmt);
- bool simd = gimple_omp_for_kind (for_stmt) & GF_OMP_FOR_SIMD;
+ bool simd = gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_SIMD;
bool distribute = gimple_omp_for_kind (for_stmt)
== GF_OMP_FOR_KIND_DISTRIBUTE;
bool taskloop = gimple_omp_for_kind (for_stmt)
fd->for_stmt = for_stmt;
fd->pre = NULL;
- if (gimple_omp_for_collapse (for_stmt) > 1)
- fd->loops = loops;
- else
- fd->loops = &fd->loop;
-
fd->have_nowait = distribute || simd;
fd->have_ordered = false;
+ fd->have_reductemp = false;
+ fd->have_pointer_condtemp = false;
+ fd->have_scantemp = false;
+ fd->have_nonctrl_scantemp = false;
+ fd->lastprivate_conditional = 0;
+ fd->tiling = NULL_TREE;
fd->collapse = 1;
fd->ordered = 0;
fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
fd->sched_modifiers = 0;
fd->chunk_size = NULL_TREE;
fd->simd_schedule = false;
- if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
- fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR;
collapse_iter = NULL;
collapse_count = NULL;
collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t);
}
break;
+ case OMP_CLAUSE_TILE:
+ fd->tiling = OMP_CLAUSE_TILE_LIST (t);
+ fd->collapse = list_length (fd->tiling);
+ gcc_assert (fd->collapse);
+ collapse_iter = &OMP_CLAUSE_TILE_ITERVAR (t);
+ collapse_count = &OMP_CLAUSE_TILE_COUNT (t);
+ break;
+ case OMP_CLAUSE__REDUCTEMP_:
+ fd->have_reductemp = true;
+ break;
+ case OMP_CLAUSE_LASTPRIVATE:
+ if (OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (t))
+ fd->lastprivate_conditional++;
+ break;
+ case OMP_CLAUSE__CONDTEMP_:
+ if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t))))
+ fd->have_pointer_condtemp = true;
+ break;
+ case OMP_CLAUSE__SCANTEMP_:
+ fd->have_scantemp = true;
+ if (!OMP_CLAUSE__SCANTEMP__ALLOC (t)
+ && !OMP_CLAUSE__SCANTEMP__CONTROL (t))
+ fd->have_nonctrl_scantemp = true;
+ break;
default:
break;
}
+
+ if (fd->collapse > 1 || fd->tiling)
+ fd->loops = loops;
+ else
+ fd->loops = &fd->loop;
+
if (fd->ordered && fd->collapse == 1 && loops != NULL)
{
fd->loops = loops;
fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
gcc_assert (fd->chunk_size == NULL);
}
- gcc_assert (fd->collapse == 1 || collapse_iter != NULL);
+ gcc_assert ((fd->collapse == 1 && !fd->tiling) || collapse_iter != NULL);
if (taskloop)
fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME;
if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
int cnt = fd->ordered ? fd->ordered : fd->collapse;
for (i = 0; i < cnt; i++)
{
- if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL))
+ if (i == 0
+ && fd->collapse == 1
+ && !fd->tiling
+ && (fd->ordered == 0 || loops == NULL))
loop = &fd->loop;
else if (loops != NULL)
loop = loops + i;
loop->cond_code = gimple_omp_for_cond (for_stmt, i);
loop->n2 = gimple_omp_for_final (for_stmt, i);
gcc_assert (loop->cond_code != NE_EXPR
- || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKSIMD
- || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKFOR);
- omp_adjust_for_condition (loc, &loop->cond_code, &loop->n2);
+ || (gimple_omp_for_kind (for_stmt)
+ != GF_OMP_FOR_KIND_OACC_LOOP));
t = gimple_omp_for_incr (for_stmt, i);
gcc_assert (TREE_OPERAND (t, 0) == var);
loop->step = omp_get_for_step_from_incr (loc, t);
+ omp_adjust_for_condition (loc, &loop->cond_code, &loop->n2, loop->v,
+ loop->step);
+
if (simd
|| (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
&& !fd->have_ordered))
{
- if (fd->collapse == 1)
+ if (fd->collapse == 1 && !fd->tiling)
iter_type = TREE_TYPE (loop->v);
else if (i == 0
|| TYPE_PRECISION (iter_type)
tree n;
if (loop->cond_code == LT_EXPR)
- n = fold_build2_loc (loc,
- PLUS_EXPR, TREE_TYPE (loop->v),
- loop->n2, loop->step);
+ n = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v),
+ loop->n2, loop->step);
else
n = loop->n1;
if (TREE_CODE (n) != INTEGER_CST
if (loop->cond_code == LT_EXPR)
{
n1 = loop->n1;
- n2 = fold_build2_loc (loc,
- PLUS_EXPR, TREE_TYPE (loop->v),
- loop->n2, loop->step);
+ n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v),
+ loop->n2, loop->step);
}
else
{
- n1 = fold_build2_loc (loc,
- MINUS_EXPR, TREE_TYPE (loop->v),
- loop->n2, loop->step);
+ n1 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (loop->v),
+ loop->n2, loop->step);
n2 = loop->n1;
}
if (TREE_CODE (n1) != INTEGER_CST
if (POINTER_TYPE_P (itype))
itype = signed_type_for (itype);
t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1));
- t = fold_build2_loc (loc,
- PLUS_EXPR, itype,
- fold_convert_loc (loc, itype, loop->step), t);
+ t = fold_build2_loc (loc, PLUS_EXPR, itype,
+ fold_convert_loc (loc, itype, loop->step),
+ t);
t = fold_build2_loc (loc, PLUS_EXPR, itype, t,
- fold_convert_loc (loc, itype, loop->n2));
+ fold_convert_loc (loc, itype, loop->n2));
t = fold_build2_loc (loc, MINUS_EXPR, itype, t,
- fold_convert_loc (loc, itype, loop->n1));
+ fold_convert_loc (loc, itype, loop->n1));
if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR)
- t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype,
- fold_build1_loc (loc, NEGATE_EXPR, itype, t),
- fold_build1_loc (loc, NEGATE_EXPR, itype,
- fold_convert_loc (loc, itype,
- loop->step)));
+ {
+ tree step = fold_convert_loc (loc, itype, loop->step);
+ t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype,
+ fold_build1_loc (loc, NEGATE_EXPR,
+ itype, t),
+ fold_build1_loc (loc, NEGATE_EXPR,
+ itype, step));
+ }
else
t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t,
- fold_convert_loc (loc, itype, loop->step));
+ fold_convert_loc (loc, itype,
+ loop->step));
t = fold_convert_loc (loc, long_long_unsigned_type_node, t);
if (count != NULL_TREE)
- count = fold_build2_loc (loc,
- MULT_EXPR, long_long_unsigned_type_node,
- count, t);
+ count = fold_build2_loc (loc, MULT_EXPR,
+ long_long_unsigned_type_node,
+ count, t);
else
count = t;
if (TREE_CODE (count) != INTEGER_CST)
*collapse_count = create_tmp_var (iter_type, ".count");
}
- if (fd->collapse > 1 || (fd->ordered && loops))
+ if (fd->collapse > 1 || fd->tiling || (fd->ordered && loops))
{
fd->loop.v = *collapse_iter;
fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0);
/* Return maximum possible vectorization factor for the target. */
-int
+poly_uint64
omp_max_vf (void)
{
if (!optimize
|| optimize_debug
|| !flag_tree_loop_optimize
|| (!flag_tree_loop_vectorize
- && (global_options_set.x_flag_tree_loop_vectorize
- || global_options_set.x_flag_tree_vectorize)))
+ && global_options_set.x_flag_tree_loop_vectorize))
return 1;
- int vf = 1;
- int vs = targetm.vectorize.autovectorize_vector_sizes ();
- if (vs)
- vf = 1 << floor_log2 (vs);
- else
+ auto_vector_sizes sizes;
+ targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
+ if (!sizes.is_empty ())
{
- machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
- if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT)
- vf = GET_MODE_NUNITS (vqimode);
+ poly_uint64 vf = 0;
+ for (unsigned int i = 0; i < sizes.length (); ++i)
+ vf = ordered_max (vf, sizes[i]);
+ return vf;
}
- return vf;
+
+ machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
+ if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT)
+ return GET_MODE_NUNITS (vqimode);
+
+ return 1;
}
/* Return maximum SIMT width if offloading may target SIMT hardware. */
if (!optimize)
return 0;
if (ENABLE_OFFLOADING)
- for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c; )
+ for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
{
if (!strncmp (c, "nvptx", strlen ("nvptx")))
return 32;
represented as a list of INTEGER_CST. Those that are runtime
exprs are represented as an INTEGER_CST of zero.
- TOOO. Normally the attribute will just contain a single such list. If
+ TODO: Normally the attribute will just contain a single such list. If
however it contains a list of lists, this will represent the use of
device_type. Each member of the outer list is an assoc list of
dimensions, keyed by the device type. The first entry will be the
/* Replace any existing oacc fn attribute with updated dimensions. */
-void
-oacc_replace_fn_attrib (tree fn, tree dims)
+/* Variant working on a list of attributes. */
+
+tree
+oacc_replace_fn_attrib_attr (tree attribs, tree dims)
{
tree ident = get_identifier (OACC_FN_ATTRIB);
- tree attribs = DECL_ATTRIBUTES (fn);
/* If we happen to be present as the first attrib, drop it. */
if (attribs && TREE_PURPOSE (attribs) == ident)
attribs = TREE_CHAIN (attribs);
- DECL_ATTRIBUTES (fn) = tree_cons (ident, dims, attribs);
+ return tree_cons (ident, dims, attribs);
+}
+
+/* Variant working on a function decl. */
+
+void
+oacc_replace_fn_attrib (tree fn, tree dims)
+{
+ DECL_ATTRIBUTES (fn)
+ = oacc_replace_fn_attrib_attr (DECL_ATTRIBUTES (fn), dims);
}
/* Scan CLAUSES for launch dimensions and attach them to the oacc
function attribute. Push any that are non-constant onto the ARGS
- list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is
- true, if these are for a kernels region offload function. */
+ list, along with an appropriate GOMP_LAUNCH_DIM tag. */
void
-oacc_set_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
+oacc_set_fn_attrib (tree fn, tree clauses, vec<tree> *args)
{
/* Must match GOMP_DIM ordering. */
static const omp_clause_code ids[]
non_const |= GOMP_DIM_MASK (ix);
}
attr = tree_cons (NULL_TREE, dim, attr);
- /* Note kernelness with TREE_PUBLIC. */
- if (is_kernel)
- TREE_PUBLIC (attr) = 1;
}
oacc_replace_fn_attrib (fn, attr);
}
}
-/* Process the routine's dimension clauess to generate an attribute
- value. Issue diagnostics as appropriate. We default to SEQ
- (OpenACC 2.5 clarifies this). All dimensions have a size of zero
+/* Verify OpenACC routine clauses.
+
+ Returns 0 if FNDECL should be marked with an OpenACC 'routine' directive, 1
+ if it has already been marked in compatible way, and -1 if incompatible.
+ Upon returning, the chain of clauses will contain exactly one clause
+ specifying the level of parallelism. */
+
+int
+oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc,
+ const char *routine_str)
+{
+ tree c_level = NULL_TREE;
+ tree c_p = NULL_TREE;
+ for (tree c = *clauses; c; c_p = c, c = OMP_CLAUSE_CHAIN (c))
+ switch (OMP_CLAUSE_CODE (c))
+ {
+ case OMP_CLAUSE_GANG:
+ case OMP_CLAUSE_WORKER:
+ case OMP_CLAUSE_VECTOR:
+ case OMP_CLAUSE_SEQ:
+ if (c_level == NULL_TREE)
+ c_level = c;
+ else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_CODE (c_level))
+ {
+ /* This has already been diagnosed in the front ends. */
+ /* Drop the duplicate clause. */
+ gcc_checking_assert (c_p != NULL_TREE);
+ OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c);
+ c = c_p;
+ }
+ else
+ {
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "%qs specifies a conflicting level of parallelism",
+ omp_clause_code_name[OMP_CLAUSE_CODE (c)]);
+ inform (OMP_CLAUSE_LOCATION (c_level),
+ "... to the previous %qs clause here",
+ omp_clause_code_name[OMP_CLAUSE_CODE (c_level)]);
+ /* Drop the conflicting clause. */
+ gcc_checking_assert (c_p != NULL_TREE);
+ OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c);
+ c = c_p;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (c_level == NULL_TREE)
+ {
+ /* Default to an implicit 'seq' clause. */
+ c_level = build_omp_clause (loc, OMP_CLAUSE_SEQ);
+ OMP_CLAUSE_CHAIN (c_level) = *clauses;
+ *clauses = c_level;
+ }
+ /* In *clauses, we now have exactly one clause specifying the level of
+ parallelism. */
+
+ tree attr
+ = lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl));
+ if (attr != NULL_TREE)
+ {
+ /* If a "#pragma acc routine" has already been applied, just verify
+ this one for compatibility. */
+ /* Collect previous directive's clauses. */
+ tree c_level_p = NULL_TREE;
+ for (tree c = TREE_VALUE (attr); c; c = OMP_CLAUSE_CHAIN (c))
+ switch (OMP_CLAUSE_CODE (c))
+ {
+ case OMP_CLAUSE_GANG:
+ case OMP_CLAUSE_WORKER:
+ case OMP_CLAUSE_VECTOR:
+ case OMP_CLAUSE_SEQ:
+ gcc_checking_assert (c_level_p == NULL_TREE);
+ c_level_p = c;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ gcc_checking_assert (c_level_p != NULL_TREE);
+ /* ..., and compare to current directive's, which we've already collected
+ above. */
+ tree c_diag;
+ tree c_diag_p;
+ /* Matching level of parallelism? */
+ if (OMP_CLAUSE_CODE (c_level) != OMP_CLAUSE_CODE (c_level_p))
+ {
+ c_diag = c_level;
+ c_diag_p = c_level_p;
+ goto incompatible;
+ }
+ /* Compatible. */
+ return 1;
+
+ incompatible:
+ if (c_diag != NULL_TREE)
+ error_at (OMP_CLAUSE_LOCATION (c_diag),
+ "incompatible %qs clause when applying"
+ " %<%s%> to %qD, which has already been"
+ " marked with an OpenACC 'routine' directive",
+ omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)],
+ routine_str, fndecl);
+ else if (c_diag_p != NULL_TREE)
+ error_at (loc,
+ "missing %qs clause when applying"
+ " %<%s%> to %qD, which has already been"
+ " marked with an OpenACC 'routine' directive",
+ omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)],
+ routine_str, fndecl);
+ else
+ gcc_unreachable ();
+ if (c_diag_p != NULL_TREE)
+ inform (OMP_CLAUSE_LOCATION (c_diag_p),
+ "... with %qs clause here",
+ omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)]);
+ else
+ {
+ /* In the front ends, we don't preserve location information for the
+ OpenACC routine directive itself. However, that of c_level_p
+ should be close. */
+ location_t loc_routine = OMP_CLAUSE_LOCATION (c_level_p);
+ inform (loc_routine, "... without %qs clause near to here",
+ omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)]);
+ }
+ /* Incompatible. */
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Process the OpenACC 'routine' directive clauses to generate an attribute
+ for the level of parallelism. All dimensions have a size of zero
(dynamic). TREE_PURPOSE is set to indicate whether that dimension
can have a loop partitioned on it. non-zero indicates
yes, zero indicates no. By construction once a non-zero has been
oacc_build_routine_dims (tree clauses)
{
/* Must match GOMP_DIM ordering. */
- static const omp_clause_code ids[] =
- {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ};
+ static const omp_clause_code ids[]
+ = {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ};
int ix;
int level = -1;
for (ix = GOMP_DIM_MAX + 1; ix--;)
if (OMP_CLAUSE_CODE (clauses) == ids[ix])
{
- if (level >= 0)
- error_at (OMP_CLAUSE_LOCATION (clauses),
- "multiple loop axes specified for routine");
level = ix;
break;
}
-
- /* Default to SEQ. */
- if (level < 0)
- level = GOMP_DIM_MAX;
+ gcc_checking_assert (level >= 0);
tree dims = NULL_TREE;
return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
}
-/* Return true if this oacc fn attrib is for a kernels offload
- region. We use the TREE_PUBLIC flag of each dimension -- only
- need to check the first one. */
+/* Return true if FN is an OpenMP or OpenACC offloading function. */
bool
-oacc_fn_attrib_kernels_p (tree attr)
+offloading_function_p (tree fn)
{
- return TREE_PUBLIC (TREE_VALUE (attr));
+ tree attrs = DECL_ATTRIBUTES (fn);
+ return (lookup_attribute ("omp declare target", attrs)
+ || lookup_attribute ("omp target entrypoint", attrs));
}
/* Extract an oacc execution dimension from FN. FN must be an