Common Var(flag_omit_frame_pointer) Optimization
When possible do not generate stack frames.
+fopenmp-target-simd-clone
+Common Alias(fopenmp-target-simd-clone=,any,none)
+
+fopenmp-target-simd-clone=
+Common Joined RejectNegative Enum(target_simd_clone_device) Var(flag_openmp_target_simd_clone) Init(OMP_TARGET_SIMD_CLONE_NONE) Optimization
+Generate SIMD clones for functions with the OpenMP declare target directive.
+
+Enum
+Name(target_simd_clone_device) Type(int)
+
+EnumValue
+Enum(target_simd_clone_device) String(none) Value(OMP_TARGET_SIMD_CLONE_NONE)
+
+EnumValue
+Enum(target_simd_clone_device) String(host) Value(OMP_TARGET_SIMD_CLONE_HOST)
+
+EnumValue
+Enum(target_simd_clone_device) String(nohost) Value(OMP_TARGET_SIMD_CLONE_NOHOST)
+
+EnumValue
+Enum(target_simd_clone_device) String(any) Value(OMP_TARGET_SIMD_CLONE_ANY)
+
fopt-info
Common Var(flag_opt_info) Optimization
Enable all optimization info dumps on stderr.
static int
aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
struct cgraph_simd_clone *clonei,
- tree base_type, int num)
+ tree base_type, int num,
+ bool explicit_p)
{
tree t, ret_type;
unsigned int elt_bits, count;
|| const_simdlen > 1024
|| (const_simdlen & (const_simdlen - 1)) != 0))
{
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd", const_simdlen);
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd", const_simdlen);
return 0;
}
if (TREE_CODE (ret_type) != VOID_TYPE
&& !currently_supported_simd_type (ret_type, base_type))
{
- if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
+ if (!explicit_p)
+ ;
+ else if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
"GCC does not currently support mixed size types "
"for %<simd%> functions");
if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
&& !currently_supported_simd_type (arg_type, base_type))
{
- if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
+ if (!explicit_p)
+ ;
+ else if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
"GCC does not currently support mixed size types "
"for %<simd%> functions");
if (clonei->simdlen.is_constant (&const_simdlen)
&& maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U))
{
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "GCC does not currently support simdlen %wd for type %qT",
- const_simdlen, base_type);
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "GCC does not currently support simdlen %wd for "
+ "type %qT",
+ const_simdlen, base_type);
return 0;
}
}
gcn_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *ARG_UNUSED (node),
struct cgraph_simd_clone *clonei,
tree ARG_UNUSED (base_type),
- int ARG_UNUSED (num))
+ int ARG_UNUSED (num),
+ bool explicit_p)
{
if (known_eq (clonei->simdlen, 0U))
clonei->simdlen = 64;
{
/* Note that x86 has a similar message that is likely to trigger on
sizes that are OK for gcn; the user can't win. */
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd (amdgcn)",
- clonei->simdlen.to_constant ());
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd (amdgcn)",
+ clonei->simdlen.to_constant ());
return 0;
}
static int
ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
struct cgraph_simd_clone *clonei,
- tree base_type, int num)
+ tree base_type, int num,
+ bool explicit_p)
{
int ret = 1;
|| clonei->simdlen > 1024
|| (clonei->simdlen & (clonei->simdlen - 1)) != 0))
{
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd", clonei->simdlen.to_constant ());
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd", clonei->simdlen.to_constant ());
return 0;
}
break;
/* FALLTHRU */
default:
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported return type %qT for simd", ret_type);
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported return type %qT for simd", ret_type);
return 0;
}
default:
if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
break;
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported argument type %qT for simd", arg_type);
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported argument type %qT for simd", arg_type);
return 0;
}
}
- if (!TREE_PUBLIC (node->decl))
+ if (!TREE_PUBLIC (node->decl) || !explicit_p)
{
/* If the function isn't exported, we can pick up just one ISA
for the clones. */
cnt /= clonei->vecsize_float;
if (cnt > (TARGET_64BIT ? 16 : 8))
{
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd",
- clonei->simdlen.to_constant ());
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd",
+ clonei->simdlen.to_constant ());
return 0;
}
}
-flax-vector-conversions -fms-extensions @gol
-foffload=@var{arg} -foffload-options=@var{arg} @gol
-fopenacc -fopenacc-dim=@var{geom} @gol
--fopenmp -fopenmp-simd @gol
+-fopenmp -fopenmp-simd -fopenmp-target-simd-clone@r{[}=@var{device-type}@r{]} @gol
-fpermitted-flt-eval-methods=@var{standard} @gol
-fplan9-extensions -fsigned-bitfields -funsigned-bitfields @gol
-fsigned-char -funsigned-char -fstrict-flex-arrays[=@var{n}] @gol
@code{[[omp::directive(...)]]} and @code{[[omp::sequence(...)]]} in C++
and @code{!$omp} in Fortran. Other OpenMP directives are ignored.
+@item -fopenmp-target-simd-clone
+@item -fopenmp-target-simd-clone=@var{device-type}
+@opindex fopenmp-target-simd-clone
+@cindex OpenMP target SIMD clone
+In addition to generating SIMD clones for functions marked with the
+@code{declare simd} directive, GCC also generates clones
+for functions marked with the OpenMP @code{declare target} directive
+that are suitable for vectorization when this option is in effect. The
+@var{device-type} may be one of @code{none}, @code{host}, @code{nohost},
+and @code{any}, which correspond to keywords for the @code{device_type}
+clause of the @code{declare target} directive; clones are generated for
+the intersection of devices specified.
+@option{-fopenmp-target-simd-clone} is equivalent to
+@option{-fopenmp-target-simd-clone=any} and
+@option{-fno-openmp-target-simd-clone} is equivalent to
+@option{-fopenmp-target-simd-clone=none}.
+
+At @option{-O2} and higher (but not @option{-Os} or @option{-Og}) this
+optimization defaults to @option{-fopenmp-target-simd-clone=nohost}; otherwise
+it is disabled by default.
+
@item -fpermitted-flt-eval-methods=@var{style}
@opindex fpermitted-flt-eval-methods
@opindex fpermitted-flt-eval-methods=c11
stores.
@end deftypefn
-@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int})
+@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}, @var{bool})
This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
@var{simdlen} field if it was previously 0.
OPENACC_PRIVATIZATION_NOISY
};
+/* Targets for -fopenmp-target-simd-clone. */
+enum omp_target_simd_clone_device_kind
+{
+ OMP_TARGET_SIMD_CLONE_NONE = 0,
+ OMP_TARGET_SIMD_CLONE_HOST = 1,
+ OMP_TARGET_SIMD_CLONE_NOHOST = 2,
+ OMP_TARGET_SIMD_CLONE_ANY = 3
+};
+
#endif
#endif /* ! GCC_FLAG_TYPES_H */
#include "stringpool.h"
#include "attribs.h"
#include "omp-simd-clone.h"
+#include "omp-low.h"
+#include "omp-general.h"
+
+/* Print debug info for ok_for_auto_simd_clone to the dump file, logging
+ failure reason EXCUSE for function DECL. Always returns false. */
+static bool
+auto_simd_fail (tree decl, const char *excuse)
+{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nNot auto-cloning %s because %s\n",
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)),
+ excuse);
+ return false;
+}
+
+/* Helper function for ok_for_auto_simd_clone; return false if the statement
+ violates restrictions for an "omp declare simd" function. Specifically,
+ the function must not
+ - throw or call setjmp/longjmp
+ - write memory that could alias parallel calls
+ - read volatile memory
+ - include openmp directives or calls
+ - call functions that might do those things */
+
+static bool
+auto_simd_check_stmt (gimple *stmt, tree outer)
+{
+ tree decl;
+
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_CALL:
+
+ /* Calls to functions that are CONST or PURE are ok, even if they
+ are internal functions without a decl. Reject other internal
+ functions. */
+ if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))
+ break;
+ if (gimple_call_internal_p (stmt))
+ return auto_simd_fail (outer,
+ "body contains internal function call");
+
+ decl = gimple_call_fndecl (stmt);
+
+ /* We can't know whether indirect calls are safe. */
+ if (decl == NULL_TREE)
+ return auto_simd_fail (outer, "body contains indirect call");
+
+ /* Calls to functions that are already marked "omp declare simd" are
+ OK. */
+ if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl)))
+ break;
+
+ /* Let recursive calls to the current function through. */
+ if (decl == outer)
+ break;
+
+ /* Other function calls are not permitted. This covers all calls to
+ the libgomp API and setjmp/longjmp, too, as well as things like
+ __cxa_throw_ related to exception handling. */
+ return auto_simd_fail (outer, "body contains unsafe function call");
+
+ /* Reject EH-related constructs. Most of the EH gimple codes are
+ already lowered by the time this pass runs during IPA.
+ GIMPLE_EH_DISPATCH and GIMPLE_RESX remain and are lowered by
+ pass_lower_eh_dispatch and pass_lower_resx, respectively; those
+ passes run later. */
+ case GIMPLE_EH_DISPATCH:
+ case GIMPLE_RESX:
+ return auto_simd_fail (outer, "body contains EH constructs");
+
+ /* Asms are not permitted since we don't know what they do. */
+ case GIMPLE_ASM:
+ return auto_simd_fail (outer, "body contains inline asm");
+
+ default:
+ break;
+ }
+
+ /* Memory writes are not permitted.
+ FIXME: this could be relaxed a little to permit writes to
+ function-local variables that could not alias other instances
+ of the function running in parallel. */
+ if (gimple_store_p (stmt))
+ return auto_simd_fail (outer, "body includes memory write");
+
+ /* Volatile reads are not permitted. */
+ if (gimple_has_volatile_ops (stmt))
+ return auto_simd_fail (outer, "body includes volatile op");
+
+ /* Otherwise OK. */
+ return true;
+}
+
+/* Helper function for ok_for_auto_simd_clone: return true if type T is
+ plausible for a cloneable function argument or return type. */
+static bool
+plausible_type_for_simd_clone (tree t)
+{
+ if (TREE_CODE (t) == VOID_TYPE)
+ return true;
+ else if (RECORD_OR_UNION_TYPE_P (t) || !is_a <scalar_mode> (TYPE_MODE (t)))
+ /* Small record/union types may fit into a scalar mode, but are
+ still not suitable. */
+ return false;
+ else if (TYPE_ATOMIC (t))
+ /* Atomic types trigger warnings in simd_clone_clauses_extract. */
+ return false;
+ else
+ return true;
+}
+
+/* Check if the function NODE appears suitable for auto-annotation
+ with "declare simd". */
+
+static bool
+ok_for_auto_simd_clone (struct cgraph_node *node)
+{
+ tree decl = node->decl;
+ tree t;
+ basic_block bb;
+
+ /* Nothing to do if the function isn't a definition or doesn't
+ have a body. */
+ if (!node->definition || !node->has_gimple_body_p ())
+ return auto_simd_fail (decl, "no definition or body");
+
+ /* No point in trying to generate implicit clones if the function
+ isn't used in the compilation unit. */
+ if (!node->callers)
+ return auto_simd_fail (decl, "function is not used");
+
+ /* Nothing to do if the function already has the "omp declare simd"
+ attribute, is marked noclone, or is not "omp declare target". */
+ if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl))
+ || lookup_attribute ("noclone", DECL_ATTRIBUTES (decl))
+ || !lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl)))
+ return auto_simd_fail (decl, "incompatible attributes");
+
+ /* Check whether the function is restricted host/nohost via the
+ "omp declare target device_type" clause, and that doesn't match
+ what we're compiling for. Internally, these translate into
+ "omp declare target [no]host" attributes on the decl; "any"
+ translates into both attributes, but the default (which is supposed
+ to be equivalent to "any") is neither. */
+ tree host = lookup_attribute ("omp declare target host",
+ DECL_ATTRIBUTES (decl));
+ tree nohost = lookup_attribute ("omp declare target nohost",
+ DECL_ATTRIBUTES (decl));
+#ifdef ACCEL_COMPILER
+ if (host && !nohost)
+ return auto_simd_fail (decl, "device doesn't match for accel compiler");
+#else
+ if (nohost && !host)
+ return auto_simd_fail (decl, "device doesn't match for host compiler");
+#endif
+
+ /* Backends will check for vectorizable arguments/return types in a
+ target-specific way, but we can immediately filter out functions
+ that have implausible argument/return types. */
+ t = TREE_TYPE (TREE_TYPE (decl));
+ if (!plausible_type_for_simd_clone (t))
+ return auto_simd_fail (decl, "return type fails sniff test");
+
+ if (TYPE_ARG_TYPES (TREE_TYPE (decl)))
+ {
+ for (tree temp = TYPE_ARG_TYPES (TREE_TYPE (decl));
+ temp; temp = TREE_CHAIN (temp))
+ {
+ t = TREE_VALUE (temp);
+ if (!plausible_type_for_simd_clone (t))
+ return auto_simd_fail (decl, "argument type fails sniff test");
+ }
+ }
+ else if (DECL_ARGUMENTS (decl))
+ {
+ for (tree temp = DECL_ARGUMENTS (decl); temp; temp = DECL_CHAIN (temp))
+ {
+ t = TREE_TYPE (temp);
+ if (!plausible_type_for_simd_clone (t))
+ return auto_simd_fail (decl, "argument type fails sniff test");
+ }
+ }
+ else
+ return auto_simd_fail (decl, "function has no arguments");
+
+ /* Scan the function body to see if it is suitable for SIMD-ization. */
+ node->get_body ();
+
+ FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl))
+ {
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ if (!auto_simd_check_stmt (gsi_stmt (gsi), decl))
+ return false;
+ }
+
+ /* All is good. */
+ if (dump_file)
+ fprintf (dump_file, "\nMarking %s for auto-cloning\n",
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
+ return true;
+}
+
/* Return the number of elements in vector type VECTYPE, which is associated
with a SIMD clone. At present these always have a constant length. */
return get_identifier (str);
}
-/* Create a simd clone of OLD_NODE and return it. */
+/* Create a simd clone of OLD_NODE and return it. If FORCE_LOCAL is true,
+ create it as a local symbol, otherwise copy the symbol linkage and
+ visibility attributes from OLD_NODE. */
static struct cgraph_node *
-simd_clone_create (struct cgraph_node *old_node)
+simd_clone_create (struct cgraph_node *old_node, bool force_local)
{
struct cgraph_node *new_node;
if (old_node->definition)
return new_node;
set_decl_built_in_function (new_node->decl, NOT_BUILT_IN, 0);
- TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
- DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
- DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
- DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
- DECL_VISIBILITY_SPECIFIED (new_node->decl)
- = DECL_VISIBILITY_SPECIFIED (old_node->decl);
- DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl);
- DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl);
- if (DECL_ONE_ONLY (old_node->decl))
- make_decl_one_only (new_node->decl, DECL_ASSEMBLER_NAME (new_node->decl));
-
- /* The method cgraph_version_clone_with_body () will force the new
- symbol local. Undo this, and inherit external visibility from
- the old node. */
- new_node->local = old_node->local;
- new_node->externally_visible = old_node->externally_visible;
- new_node->calls_declare_variant_alt = old_node->calls_declare_variant_alt;
+ if (force_local)
+ {
+ TREE_PUBLIC (new_node->decl) = 0;
+ DECL_COMDAT (new_node->decl) = 0;
+ DECL_WEAK (new_node->decl) = 0;
+ DECL_EXTERNAL (new_node->decl) = 0;
+ DECL_VISIBILITY_SPECIFIED (new_node->decl) = 0;
+ DECL_VISIBILITY (new_node->decl) = VISIBILITY_DEFAULT;
+ DECL_DLLIMPORT_P (new_node->decl) = 0;
+ }
+ else
+ {
+ TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
+ DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
+ DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
+ DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
+ DECL_VISIBILITY_SPECIFIED (new_node->decl)
+ = DECL_VISIBILITY_SPECIFIED (old_node->decl);
+ DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl);
+ DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl);
+ if (DECL_ONE_ONLY (old_node->decl))
+ make_decl_one_only (new_node->decl,
+ DECL_ASSEMBLER_NAME (new_node->decl));
+
+ /* The method cgraph_version_clone_with_body () will force the new
+ symbol local. Undo this, and inherit external visibility from
+ the old node. */
+ new_node->local = old_node->local;
+ new_node->externally_visible = old_node->externally_visible;
+ new_node->calls_declare_variant_alt
+ = old_node->calls_declare_variant_alt;
+ }
return new_node;
}
void
expand_simd_clones (struct cgraph_node *node)
{
- tree attr = lookup_attribute ("omp declare simd",
- DECL_ATTRIBUTES (node->decl));
- if (attr == NULL_TREE
- || node->inlined_to
+ tree attr;
+ bool explicit_p = true;
+
+ if (node->inlined_to
|| lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
return;
+ attr = lookup_attribute ("omp declare simd",
+ DECL_ATTRIBUTES (node->decl));
+
+ /* See if we can add an "omp declare simd" directive implicitly
+ before giving up. */
+ /* FIXME: OpenACC "#pragma acc routine" translates into
+ "omp declare target", but appears also to have some other effects
+ that conflict with generating SIMD clones, causing ICEs. So don't
+ do this if we've got OpenACC instead of OpenMP. */
+ if (attr == NULL_TREE
+#ifdef ACCEL_COMPILER
+ && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY
+ || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_NOHOST)
+#else
+ && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY
+ || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_HOST)
+#endif
+ && !oacc_get_fn_attrib (node->decl)
+ && ok_for_auto_simd_clone (node))
+ {
+ attr = tree_cons (get_identifier ("omp declare simd"), NULL,
+ DECL_ATTRIBUTES (node->decl));
+ DECL_ATTRIBUTES (node->decl) = attr;
+ explicit_p = false;
+ }
+
+ if (attr == NULL_TREE)
+ return;
+
/* Ignore
#pragma omp declare simd
extern int foo ();
poly_uint64 orig_simdlen = clone_info->simdlen;
tree base_type = simd_clone_compute_base_data_type (node, clone_info);
+
/* The target can return 0 (no simd clones should be created),
1 (just one ISA of simd clones should be created) or higher
count of ISA variants. In that case, clone_info is initialized
for the first ISA variant. */
int count
= targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
- base_type, 0);
+ base_type, 0,
+ explicit_p);
if (count == 0)
continue;
/* And call the target hook again to get the right ISA. */
targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
base_type,
- i / 2);
+ i / 2,
+ explicit_p);
if ((i & 1) != 0)
clone->inbranch = 1;
}
/* Only when we are sure we want to create the clone actually
clone the function (or definitions) or create another
extern FUNCTION_DECL (for prototypes without definitions). */
- struct cgraph_node *n = simd_clone_create (node);
+ struct cgraph_node *n = simd_clone_create (node, !explicit_p);
if (n == NULL)
{
if (i == 0)
simd_clone_adjust_return_type (n);
simd_clone_adjust_argument_types (n);
}
+ if (dump_file)
+ fprintf (dump_file, "\nGenerated %s clone %s\n",
+ (TREE_PUBLIC (n->decl) ? "global" : "local"),
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)));
}
}
while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
REORDER_BLOCKS_ALGORITHM_STC },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_ftree_loop_vectorize, NULL, 1 },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_ftree_slp_vectorize, NULL, 1 },
+ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fopenmp_target_simd_clone_, NULL,
+ OMP_TARGET_SIMD_CLONE_NOHOST },
#ifdef INSN_SCHEDULING
/* Only run the pre-regalloc scheduling pass if optimizing for speed. */
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fschedule_insns, NULL, 1 },
not determined by the bitsize (in which case @var{simdlen} is always used).\n\
The hook should return 0 if SIMD clones shouldn't be emitted,\n\
or number of @var{vecsize_mangle} variants that should be emitted.",
-int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL)
+int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int, bool), NULL)
DEFHOOK
(adjust,
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
+
+/* Test that simd clones are generated for functions with "declare target". */
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit (int a, int b)
+{
+ return a + b;
+}
+#pragma omp end declare target
+
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+
+/* Although addit has external linkage, we expect clones to be generated as
+ for a function with internal linkage. */
+
+/* { dg-final { scan-ipa-dump "Generated local clone _ZGV.*N.*__Z5additii" "simdclone" { target x86_64-*-* } } } */
+/* { dg-final { scan-ipa-dump "Generated local clone _ZGV.*M.*__Z5additii" "simdclone" { target x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
+
+/* Test that simd clones are not generated for "declare target"
+ functions that throw. */
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit (int a, int b)
+{
+ if (a < 0) throw -1;
+ return a + b;
+}
+#pragma omp end declare target
+
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+
+/* { dg-final { scan-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
+
+/* Test that simd clones are generated for functions with "declare target". */
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit (int a, int b)
+{
+ return a + b;
+}
+#pragma omp end declare target
+
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+
+/* Although addit has external linkage, we expect clones to be generated as
+ for a function with internal linkage. */
+
+/* { dg-final { scan-ipa-dump "Generated local clone _ZGV.*N.*_addit" "simdclone" { target x86_64-*-* } } } */
+/* { dg-final { scan-ipa-dump "Generated local clone _ZGV.*M.*_addit" "simdclone" { target x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fdump-ipa-simdclone-details" } */
+
+/* Test that host simd clones are not generated for functions with
+ "declare target" by default at -O2. */
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit (int a, int b)
+{
+ return a + b;
+}
+#pragma omp end declare target
+
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+
+/* { dg-final { scan-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
+
+/* Test that host simd clones are not generated for functions with the nohost
+ "declare target" clause. */
+
+__attribute__ ((__noinline__)) int addit (int a, int b)
+{
+ return a + b;
+}
+#pragma omp declare target to(addit) device_type(nohost)
+
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+
+/* { dg-final { scan-ipa-dump "device doesn't match" "simdclone" { target x86_64-*-* } } } */
+/* { dg-final { scan-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" but that write memory in the body. */
+
+extern int save;
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit(int a, int b)
+{
+ save = a;
+ return a + b;
+}
+#pragma omp end declare target
+
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+
+/* { dg-final { scan-ipa-dump "body includes memory write" "simdclone" { target x86_64-*-* } } } */
+/* { dg-final { scan-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" but unsuitable arguments. */
+
+struct s {
+ int a;
+ int b;
+};
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit (struct s x)
+{
+ return x.a + x.b;
+}
+#pragma omp end declare target
+
+void callit (struct s *ss, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (ss[i]);
+}
+
+/* { dg-final { scan-ipa-dump "argument type fails sniff test" "simdclone" { target x86_64-*-* } } } */
+/* { dg-final { scan-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" but that call possibly side-effecting functions
+ in the body. */
+
+extern int f (int);
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit(int a, int b)
+{
+ return f(a) + b;
+}
+#pragma omp end declare target
+
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+
+/* { dg-final { scan-ipa-dump "body contains unsafe function call" "simdclone" { target x86_64-*-* } } } */
+/* { dg-final { scan-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */
+
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" that have no callers in the same compilation unit. */
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit (int a, int b)
+{
+ return a + b;
+}
+#pragma omp end declare target
+
+/* { dg-final { scan-ipa-dump "function is not used" "simdclone" { target x86_64-*-* } } } */
+/* { dg-final { scan-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" but that read volatile memory in the body. */
+
+extern volatile int save;
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit(int a, int b)
+{
+ return save + a + b;
+}
+#pragma omp end declare target
+
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+
+/* { dg-final { scan-ipa-dump "body includes volatile op" "simdclone" { target x86_64-*-* } } } */
+/* { dg-final { scan-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */
--- /dev/null
+# Copyright (C) 2018-2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+# Various utilities for scanning offloading ipa dump output, used by
+# libgomp.exp.
+
+load_lib scandump.exp
+load_lib scanoffload.exp
+
+# Utility for scanning compiler result, invoked via dg-final.
+# Call pass if pattern is present, otherwise fail.
+#
+# Argument 0 is the regexp to match
+# Argument 1 is the name of the dumped ipa pass
+# Argument 2 handles expected failures and the like
+proc scan-offload-ipa-dump { args } {
+
+ if { [llength $args] < 2 } {
+ error "scan-offload-ipa-dump: too few arguments"
+ return
+ }
+ if { [llength $args] > 3 } {
+ error "scan-offload-ipa-dump: too many arguments"
+ return
+ }
+ if { [llength $args] >= 3 } {
+ scoff end-1 scan-dump "offload-ipa" [lindex $args 0] \
+ "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 1]" "" \
+ [lindex $args 2]
+ } else {
+ scoff end scan-dump "offload-ipa" [lindex $args 0] \
+ "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 1]" ""
+ }
+}
+
+# Call pass if pattern is present given number of times, otherwise fail.
+# Argument 0 is the regexp to match
+# Argument 1 is number of times the regexp must be found
+# Argument 2 is the name of the dumped ipa pass
+# Argument 3 handles expected failures and the like
+proc scan-offload-ipa-dump-times { args } {
+
+ if { [llength $args] < 3 } {
+ error "scan-offload-ipa-dump-times: too few arguments"
+ return
+ }
+ if { [llength $args] > 4 } {
+ error "scan-offload-ipa-dump-times: too many arguments"
+ return
+ }
+ if { [llength $args] >= 4 } {
+ scoff end-1 scan-dump-times "offload-ipa" [lindex $args 0] \
+ [lindex $args 1] "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 2]" "" \
+ [lindex $args 3]
+ } else {
+ scoff end scan-dump-times "offload-ipa" [lindex $args 0] \
+ [lindex $args 1] "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 2]" ""
+ }
+}
+
+# Call pass if pattern is not present, otherwise fail.
+#
+# Argument 0 is the regexp to match
+# Argument 1 is the name of the dumped ipa pass
+# Argument 2 handles expected failures and the like
+proc scan-offload-ipa-dump-not { args } {
+
+ if { [llength $args] < 2 } {
+ error "scan-offload-ipa-dump-not: too few arguments"
+ return
+ }
+ if { [llength $args] > 3 } {
+ error "scan-offload-ipa-dump-not: too many arguments"
+ return
+ }
+ if { [llength $args] >= 3 } {
+ scoff end-1 scan-dump-not "offload-ipa" [lindex $args 0] \
+ "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 1]" "" \
+ [lindex $args 2]
+ } else {
+ scoff end scan-dump-not "offload-ipa" [lindex $args 0] \
+ "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 1]" ""
+ }
+}
+
+# Utility for scanning demangled compiler result, invoked via dg-final.
+# Call pass if pattern is present, otherwise fail.
+#
+# Argument 0 is the regexp to match
+# Argument 1 is the name of the dumped ipa pass
+# Argument 2 handles expected failures and the like
+proc scan-offload-ipa-dump-dem { args } {
+
+ if { [llength $args] < 2 } {
+ error "scan-offload-ipa-dump-dem: too few arguments"
+ return
+ }
+ if { [llength $args] > 3 } {
+ error "scan-offload-ipa-dump-dem: too many arguments"
+ return
+ }
+ if { [llength $args] >= 3 } {
+ scoff end-1 scan-dump-dem "offload-ipa" [lindex $args 0] \
+ "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 1]" "" \
+ [lindex $args 2]
+ } else {
+ scoff end scan-dump-dem "offload-ipa" [lindex $args 0] \
+ "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 1]" ""
+ }
+}
+
+# Call pass if demangled pattern is not present, otherwise fail.
+#
+# Argument 0 is the regexp to match
+# Argument 1 is the name of the dumped ipa pass
+# Argument 2 handles expected failures and the like
+proc scan-offload-ipa-dump-dem-not { args } {
+
+ if { [llength $args] < 2 } {
+ error "scan-offload-ipa-dump-dem-not: too few arguments"
+ return
+ }
+ if { [llength $args] > 3 } {
+ error "scan-offload-ipa-dump-dem-not: too many arguments"
+ return
+ }
+ if { [llength $args] >= 3 } {
+ scoff end-1 scan-dump-dem-not "offload-ipa" [lindex $args 0] \
+ "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 1]" "" \
+ [lindex $args 2]
+ } else {
+ scoff end scan-dump-dem-not "offload-ipa" [lindex $args 0] \
+ "\[0-9\]\[0-9\]\[0-9]i.[lindex $args 1]" ""
+ }
+}
load_gcc_lib scantree.exp
load_gcc_lib scanltranstree.exp
load_gcc_lib scanoffload.exp
+load_gcc_lib scanoffloadipa.exp
load_gcc_lib scanoffloadtree.exp
load_gcc_lib scanoffloadrtl.exp
load_gcc_lib scanipa.exp
--- /dev/null
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options "-O2 -foffload-options=-fdump-ipa-simdclone-details" } */
+
+/* Test that simd clones for the offload processor are generated for
+ functions with "declare target" when enabled by default at -O2. */
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit (int a, int b)
+{
+ return a + b;
+}
+
+__attribute__ ((__noinline__))
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int aa[16], bb[16], cc[16];
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ aa[i] = i;
+ bb[i] = -i;
+ }
+ callit (aa, bb, cc);
+ for (i = 0; i < 16; i++)
+ if (cc[i] != 0)
+ return 1;
+ return 0;
+}
+
+/* Although addit has external linkage, we expect clones to be generated as
+ for a function with internal linkage. */
+
+/* { dg-final { scan-offload-ipa-dump "Generated local clone _ZGV.*N.*_addit" "simdclone" } } */
+/* { dg-final { scan-offload-ipa-dump "Generated local clone _ZGV.*M.*_addit" "simdclone" } } */
--- /dev/null
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options "-foffload-options=-fdump-ipa-simdclone-details -foffload-options=-fno-openmp-target-simd-clone" } */
+
+/* Test that simd clones for the offload processor are not generated for
+ functions with "declare target" when explicitly disabled. */
+
+#pragma omp declare target
+__attribute__ ((__noinline__)) int addit (int a, int b)
+{
+ return a + b;
+}
+
+__attribute__ ((__noinline__))
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int aa[16], bb[16], cc[16];
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ aa[i] = i;
+ bb[i] = -i;
+ }
+ callit (aa, bb, cc);
+ for (i = 0; i < 16; i++)
+ if (cc[i] != 0)
+ return 1;
+ return 0;
+}
+
+/* { dg-final { scan-offload-ipa-dump-not "Generated .* clone" "simdclone" } } */
--- /dev/null
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options "-O2 -foffload-options=-fdump-ipa-simdclone-details" } */
+
+/* Test that device simd clones are not generated for functions with the host
+ "declare target" clause only. */
+
+__attribute__ ((__noinline__)) int addit (int a, int b)
+{
+ return a + b;
+}
+#pragma omp declare target to(addit) device_type(host)
+
+#pragma omp declare target
+void callit (int *a, int *b, int *c)
+{
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 16; i++)
+ c[i] = addit (a[i], b[i]);
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int aa[16], bb[16], cc[16];
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ aa[i] = i;
+ bb[i] = -i;
+ }
+ callit (aa, bb, cc);
+ for (i = 0; i < 16; i++)
+ if (cc[i] != 0)
+ return 1;
+ return 0;
+}
+
+/* { dg-final { scan-offload-ipa-dump "device doesn't match" "simdclone" { target x86_64-*-* } } } */
+/* { dg-final { scan-offload-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */