Common Var(flag_omit_frame_pointer) Optimization
When possible do not generate stack frames.
+fopenmp-target-simd-clone
+Common Var(flag_openmp_target_simd_clone) Optimization
+Generate SIMD clones for functions with the OpenMP declare target directive.
+
fopt-info
Common Var(flag_opt_info) Optimization
Enable all optimization info dumps on stderr.
static int
aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
struct cgraph_simd_clone *clonei,
- tree base_type, int num)
+ tree base_type, int num,
+ bool explicit_p)
{
tree t, ret_type;
unsigned int elt_bits, count;
|| const_simdlen > 1024
|| (const_simdlen & (const_simdlen - 1)) != 0))
{
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd", const_simdlen);
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd", const_simdlen);
return 0;
}
if (TREE_CODE (ret_type) != VOID_TYPE
&& !currently_supported_simd_type (ret_type, base_type))
{
- if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
+ if (!explicit_p)
+ ;
+ else if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
"GCC does not currently support mixed size types "
"for %<simd%> functions");
if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
&& !currently_supported_simd_type (arg_type, base_type))
{
- if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
+ if (!explicit_p)
+ ;
+ else if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
"GCC does not currently support mixed size types "
"for %<simd%> functions");
if (clonei->simdlen.is_constant (&const_simdlen)
&& maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U))
{
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "GCC does not currently support simdlen %wd for type %qT",
- const_simdlen, base_type);
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "GCC does not currently support simdlen %wd for "
+ "type %qT",
+ const_simdlen, base_type);
return 0;
}
}
gcn_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *ARG_UNUSED (node),
struct cgraph_simd_clone *clonei,
tree base_type,
- int ARG_UNUSED (num))
+ int ARG_UNUSED (num),
+ bool explicit_p)
{
unsigned int elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
{
/* Note that x86 has a similar message that is likely to trigger on
sizes that are OK for gcn; the user can't win. */
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd (amdgcn)",
- clonei->simdlen.to_constant ());
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd (amdgcn)",
+ clonei->simdlen.to_constant ());
return 0;
}
static int
ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
struct cgraph_simd_clone *clonei,
- tree base_type, int num)
+ tree base_type, int num,
+ bool explicit_p)
{
int ret = 1;
|| clonei->simdlen > 1024
|| (clonei->simdlen & (clonei->simdlen - 1)) != 0))
{
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd", clonei->simdlen.to_constant ());
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd", clonei->simdlen.to_constant ());
return 0;
}
break;
/* FALLTHRU */
default:
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported return type %qT for simd", ret_type);
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported return type %qT for simd", ret_type);
return 0;
}
default:
if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
break;
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported argument type %qT for simd", arg_type);
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported argument type %qT for simd", arg_type);
return 0;
}
}
- if (!TREE_PUBLIC (node->decl))
+ if (!TREE_PUBLIC (node->decl) || !explicit_p)
{
/* If the function isn't exported, we can pick up just one ISA
for the clones. */
cnt /= clonei->vecsize_float;
if (cnt > (TARGET_64BIT ? 16 : 8))
{
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd",
- clonei->simdlen.to_constant ());
+ if (explicit_p)
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd",
+ clonei->simdlen.to_constant ());
return 0;
}
}
-flax-vector-conversions -fms-extensions @gol
-foffload=@var{arg} -foffload-options=@var{arg} -foffload-memory=@var{arg} @gol
-fopenacc -fopenacc-dim=@var{geom} @gol
--fopenmp -fopenmp-simd @gol
+-fopenmp -fopenmp-simd -fopenmp-target-simd-clone @gol
-fpermitted-flt-eval-methods=@var{standard} @gol
-fplan9-extensions -fsigned-bitfields -funsigned-bitfields @gol
-fsigned-char -funsigned-char -fsso-struct=@var{endianness}}
in C/C++ and @code{!$omp} in Fortran. Other OpenMP directives
are ignored.
+@item -fopenmp-target-simd-clone
+@opindex fopenmp-target-simd-clone
+@cindex OpenMP target SIMD clone
+In addition to generating SIMD clones for functions marked with the
+@code{declare simd} directive, GCC also generates clones
+for functions marked with the OpenMP @code{declare target} directive
+that are suitable for vectorization when this option is in effect.
+It is enabled by default at @option{-O2} and higher (but not @option{-Os}
+or @option{-Og}).
+
@item -fpermitted-flt-eval-methods=@var{style}
@opindex fpermitted-flt-eval-methods
@opindex fpermitted-flt-eval-methods=c11
this target than a sequence of elementwise loads or stores.
@end deftypevr
-@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int})
+@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}, @var{bool})
This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
@var{simdlen} field if it was previously 0.
#include "stringpool.h"
#include "attribs.h"
#include "omp-simd-clone.h"
+#include "omp-low.h"
+#include "omp-general.h"
+
+/* Helper function for mark_auto_simd_clone; return false if the statement
+ violates restrictions for an "omp declare simd" function. Specifically,
+ the function must not
+ - throw or call setjmp/longjmp
+ - write memory that could alias parallel calls
+ - include openmp directives or calls
+ - call functions that might do those things */
+
+static bool
+auto_simd_check_stmt (gimple *stmt, tree outer)
+{
+ tree decl;
+
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_CALL:
+ decl = gimple_call_fndecl (stmt);
+
+ /* We can't know whether indirect calls are safe. */
+ if (decl == NULL_TREE)
+ return false;
+
+ /* Calls to functions that are CONST or PURE are ok. */
+ if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))
+ break;
+
+ /* Calls to functions that are already marked "omp declare simd" are
+ OK. */
+ if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl)))
+ break;
+
+ /* Let recursive calls to the current function through. */
+ if (decl == outer)
+ break;
+
+ /* Other function calls are not permitted. */
+ return false;
+
+ /* OpenMP directives are not permitted. */
+ CASE_GIMPLE_OMP:
+ return false;
+
+ /* Conservatively reject all EH-related constructs. */
+ case GIMPLE_CATCH:
+ case GIMPLE_EH_FILTER:
+ case GIMPLE_EH_MUST_NOT_THROW:
+ case GIMPLE_EH_ELSE:
+ case GIMPLE_EH_DISPATCH:
+ case GIMPLE_RESX:
+ case GIMPLE_TRY:
+ return false;
+
+ /* Asms are not permitted since we don't know what they do. */
+ case GIMPLE_ASM:
+ return false;
+
+ default:
+ break;
+ }
+
+ /* Memory writes are not permitted.
+ FIXME: this could be relaxed a little to permit writes to
+ function-local variables that could not alias other instances
+ of the function running in parallel. */
+ if (gimple_store_p (stmt))
+ return false;
+ else
+ return true;
+}
+
+/* If the function NODE appears suitable for auto-annotation with "declare
+ simd", add and return such an attribute, otherwise return null. */
+
+static tree
+mark_auto_simd_clone (struct cgraph_node *node)
+{
+ tree decl = node->decl;
+ tree t;
+ machine_mode m;
+ tree result;
+ basic_block bb;
+
+ /* Nothing to do if the function isn't a definition or doesn't
+ have a body. */
+ if (!node->definition || !node->has_gimple_body_p ())
+ return NULL_TREE;
+
+ /* Nothing to do if the function already has the "omp declare simd"
+ attribute, is marked noclone, or is not "omp declare target". */
+ if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl))
+ || lookup_attribute ("noclone", DECL_ATTRIBUTES (decl))
+ || !lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl)))
+ return NULL_TREE;
+
+ /* Backends will check for vectorizable arguments/return types in a
+ target-specific way, but we can immediately filter out functions
+ that have non-scalar arguments/return types. Also, atomic types
+ trigger warnings in simd_clone_clauses_extract. */
+ t = TREE_TYPE (TREE_TYPE (decl));
+ m = TYPE_MODE (t);
+ if (!(VOID_TYPE_P (t) || is_a <scalar_mode> (m)) || TYPE_ATOMIC (t))
+ return NULL_TREE;
+
+ if (TYPE_ARG_TYPES (TREE_TYPE (decl)))
+ {
+ for (tree temp = TYPE_ARG_TYPES (TREE_TYPE (decl));
+ temp; temp = TREE_CHAIN (temp))
+ {
+ t = TREE_VALUE (temp);
+ m = TYPE_MODE (t);
+ if (!(VOID_TYPE_P (t) || is_a <scalar_mode> (m)) || TYPE_ATOMIC (t))
+ return NULL_TREE;
+ }
+ }
+ else
+ {
+ for (tree temp = DECL_ARGUMENTS (decl); temp; temp = DECL_CHAIN (temp))
+ {
+ t = TREE_TYPE (temp);
+ m = TYPE_MODE (t);
+ if (!(VOID_TYPE_P (t) || is_a <scalar_mode> (m)) || TYPE_ATOMIC (t))
+ return NULL_TREE;
+ }
+ }
+
+ /* Scan the function body to see if it is suitable for SIMD-ization. */
+ node->get_body ();
+
+ FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl))
+ {
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ if (!auto_simd_check_stmt (gsi_stmt (gsi), decl))
+ return NULL_TREE;
+ }
+
+ /* All is good. */
+ result = tree_cons (get_identifier ("omp declare simd"), NULL,
+ DECL_ATTRIBUTES (decl));
+ DECL_ATTRIBUTES (decl) = result;
+ return result;
+}
+
/* Return the number of elements in vector type VECTYPE, which is associated
with a SIMD clone. At present these always have a constant length. */
return get_identifier (str);
}
-/* Create a simd clone of OLD_NODE and return it. */
+/* Create a simd clone of OLD_NODE and return it. If FORCE_LOCAL is true,
+ create it as a local symbol, otherwise copy the symbol linkage and
+ visibility attributes from OLD_NODE. */
static struct cgraph_node *
-simd_clone_create (struct cgraph_node *old_node)
+simd_clone_create (struct cgraph_node *old_node, bool force_local)
{
struct cgraph_node *new_node;
if (old_node->definition)
return new_node;
set_decl_built_in_function (new_node->decl, NOT_BUILT_IN, 0);
- TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
- DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
- DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
- DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
- DECL_VISIBILITY_SPECIFIED (new_node->decl)
- = DECL_VISIBILITY_SPECIFIED (old_node->decl);
- DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl);
- DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl);
- if (DECL_ONE_ONLY (old_node->decl))
- make_decl_one_only (new_node->decl, DECL_ASSEMBLER_NAME (new_node->decl));
-
- /* The method cgraph_version_clone_with_body () will force the new
- symbol local. Undo this, and inherit external visibility from
- the old node. */
- new_node->local = old_node->local;
- new_node->externally_visible = old_node->externally_visible;
- new_node->calls_declare_variant_alt = old_node->calls_declare_variant_alt;
+ if (force_local)
+ {
+ TREE_PUBLIC (new_node->decl) = 0;
+ DECL_COMDAT (new_node->decl) = 0;
+ DECL_WEAK (new_node->decl) = 0;
+ DECL_EXTERNAL (new_node->decl) = 0;
+ DECL_VISIBILITY_SPECIFIED (new_node->decl) = 0;
+ DECL_VISIBILITY (new_node->decl) = VISIBILITY_DEFAULT;
+ DECL_DLLIMPORT_P (new_node->decl) = 0;
+ }
+ else
+ {
+ TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
+ DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
+ DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
+ DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
+ DECL_VISIBILITY_SPECIFIED (new_node->decl)
+ = DECL_VISIBILITY_SPECIFIED (old_node->decl);
+ DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl);
+ DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl);
+ if (DECL_ONE_ONLY (old_node->decl))
+ make_decl_one_only (new_node->decl,
+ DECL_ASSEMBLER_NAME (new_node->decl));
+
+ /* The method cgraph_version_clone_with_body () will force the new
+ symbol local. Undo this, and inherit external visibility from
+ the old node. */
+ new_node->local = old_node->local;
+ new_node->externally_visible = old_node->externally_visible;
+ new_node->calls_declare_variant_alt
+ = old_node->calls_declare_variant_alt;
+ }
return new_node;
}
void
expand_simd_clones (struct cgraph_node *node)
{
- tree attr = lookup_attribute ("omp declare simd",
- DECL_ATTRIBUTES (node->decl));
- if (attr == NULL_TREE
- || node->inlined_to
+ tree attr;
+ bool explicit_p = true;
+
+ if (node->inlined_to
|| lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
return;
+ attr = lookup_attribute ("omp declare simd",
+ DECL_ATTRIBUTES (node->decl));
+
+ /* See if we can add an "omp declare simd" directive implicitly
+ before giving up. */
+ /* FIXME: OpenACC "#pragma acc routine" translates into
+ "omp declare target", but appears also to have some other effects
+ that conflict with generating SIMD clones, causing ICEs. So don't
+ do this if we've got OpenACC instead of OpenMP. */
+ if (attr == NULL_TREE
+ && flag_openmp_target_simd_clone
+ && !oacc_get_fn_attrib (node->decl))
+ {
+ attr = mark_auto_simd_clone (node);
+ explicit_p = false;
+ }
+ if (attr == NULL_TREE)
+ return;
+
/* Ignore
#pragma omp declare simd
extern int foo ();
poly_uint64 orig_simdlen = clone_info->simdlen;
tree base_type = simd_clone_compute_base_data_type (node, clone_info);
+
/* The target can return 0 (no simd clones should be created),
1 (just one ISA of simd clones should be created) or higher
count of ISA variants. In that case, clone_info is initialized
for the first ISA variant. */
int count
= targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
- base_type, 0);
+ base_type, 0,
+ explicit_p);
if (count == 0)
continue;
/* And call the target hook again to get the right ISA. */
targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
base_type,
- i / 2);
+ i / 2,
+ explicit_p);
if ((i & 1) != 0)
clone->inbranch = 1;
}
/* Only when we are sure we want to create the clone actually
clone the function (or definitions) or create another
extern FUNCTION_DECL (for prototypes without definitions). */
- struct cgraph_node *n = simd_clone_create (node);
+ struct cgraph_node *n = simd_clone_create (node, !explicit_p);
if (n == NULL)
{
if (i == 0)
REORDER_BLOCKS_ALGORITHM_STC },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_ftree_loop_vectorize, NULL, 1 },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_ftree_slp_vectorize, NULL, 1 },
+ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fopenmp_target_simd_clone, NULL, 1 },
#ifdef INSN_SCHEDULING
/* Only run the pre-regalloc scheduling pass if optimizing for speed. */
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fschedule_insns, NULL, 1 },
not determined by the bitsize (in which case @var{simdlen} is always used).\n\
The hook should return 0 if SIMD clones shouldn't be emitted,\n\
or number of @var{vecsize_mangle} variants that should be emitted.",
-int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL)
+int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int, bool), NULL)
DEFHOOK
(adjust,
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+
+/* Test that simd clones are generated for functions with "declare target". */
+
+#pragma omp declare target
+int addit(int a, int b, int c)
+{
+ return a + b;
+}
+#pragma omp end declare target
+
+/* Although addit has external linkage, we expect clones to be generated as
+ for a function with internal linkage. */
+
+/* { dg-final { scan-assembler "\\.type.*_ZGVbN4vvv_addit,.*function" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-assembler "\\.type.*_ZGVbM4vvv_addit,.*function" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-assembler-not "\\.globl.*_ZGVbN4vvv_addit" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-assembler-not "\\.globl.*_ZGVbM4vvv_addit" { target i?86-*-* x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" but unsuitable arguments. */
+
+struct s {
+ int a;
+ int b;
+};
+
+#pragma omp declare target
+int addit (struct s x)
+{
+ return x.a + x.b;
+}
+#pragma omp end declare target
+
+/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" but that call possibly side-effecting functions
+ in the body. */
+
+extern int f (int);
+
+#pragma omp declare target
+int addit(int a, int b, int c)
+{
+ return f(a) + b;
+}
+#pragma omp end declare target
+
+/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */
+
--- /dev/null
+/* { dg-options "-fopenmp -O2" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" but that write memory in the body. */
+
+extern int save;
+
+#pragma omp declare target
+int addit(int a, int b, int c)
+{
+ save = c;
+ return a + b;
+}
+#pragma omp end declare target
+
+/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -Os" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" at -Os. */
+
+#pragma omp declare target
+int addit(int a, int b, int c)
+{
+ return a + b;
+}
+#pragma omp end declare target
+
+/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */
--- /dev/null
+/* { dg-options "-fopenmp -Og" } */
+
+/* Test that simd clones are not generated for functions with
+ "declare target" at -Og. */
+
+#pragma omp declare target
+int addit(int a, int b, int c)
+{
+ return a + b;
+}
+#pragma omp end declare target
+
+/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */