Common Var(flag_omit_frame_pointer) Optimization
When possible do not generate stack frames.
-fopenmp-target-simd-clone
-Common Var(flag_openmp_target_simd_clone) Optimization
-Generate SIMD clones for functions with the OpenMP declare target directive.
-
fopt-info
Common Var(flag_opt_info) Optimization
Enable all optimization info dumps on stderr.
static int
aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
struct cgraph_simd_clone *clonei,
- tree base_type, int num,
- bool explicit_p)
+ tree base_type, int num)
{
tree t, ret_type;
unsigned int elt_bits, count;
|| const_simdlen > 1024
|| (const_simdlen & (const_simdlen - 1)) != 0))
{
- if (explicit_p)
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd", const_simdlen);
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd", const_simdlen);
return 0;
}
if (TREE_CODE (ret_type) != VOID_TYPE
&& !currently_supported_simd_type (ret_type, base_type))
{
- if (!explicit_p)
- ;
- else if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
+ if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
"GCC does not currently support mixed size types "
"for %<simd%> functions");
if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
&& !currently_supported_simd_type (arg_type, base_type))
{
- if (!explicit_p)
- ;
- else if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
+ if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
"GCC does not currently support mixed size types "
"for %<simd%> functions");
if (clonei->simdlen.is_constant (&const_simdlen)
&& maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U))
{
- if (explicit_p)
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "GCC does not currently support simdlen %wd for "
- "type %qT",
- const_simdlen, base_type);
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "GCC does not currently support simdlen %wd for type %qT",
+ const_simdlen, base_type);
return 0;
}
}
gcn_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *ARG_UNUSED (node),
struct cgraph_simd_clone *clonei,
tree base_type,
- int ARG_UNUSED (num),
- bool explicit_p)
+ int ARG_UNUSED (num))
{
unsigned int elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
{
/* Note that x86 has a similar message that is likely to trigger on
sizes that are OK for gcn; the user can't win. */
- if (explicit_p)
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd (amdgcn)",
- clonei->simdlen.to_constant ());
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd (amdgcn)",
+ clonei->simdlen.to_constant ());
return 0;
}
static int
ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
struct cgraph_simd_clone *clonei,
- tree base_type, int num,
- bool explicit_p)
+ tree base_type, int num)
{
int ret = 1;
|| clonei->simdlen > 1024
|| (clonei->simdlen & (clonei->simdlen - 1)) != 0))
{
- if (explicit_p)
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd", clonei->simdlen.to_constant ());
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd", clonei->simdlen.to_constant ());
return 0;
}
break;
/* FALLTHRU */
default:
- if (explicit_p)
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported return type %qT for simd", ret_type);
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported return type %qT for simd", ret_type);
return 0;
}
default:
if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
break;
- if (explicit_p)
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported argument type %qT for simd", arg_type);
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported argument type %qT for simd", arg_type);
return 0;
}
}
- if (!TREE_PUBLIC (node->decl) || !explicit_p)
+ if (!TREE_PUBLIC (node->decl))
{
/* If the function isn't exported, we can pick up just one ISA
for the clones. */
cnt /= clonei->vecsize_float;
if (cnt > (TARGET_64BIT ? 16 : 8))
{
- if (explicit_p)
- warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %wd",
- clonei->simdlen.to_constant ());
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %wd",
+ clonei->simdlen.to_constant ());
return 0;
}
}
-flax-vector-conversions -fms-extensions @gol
-foffload=@var{arg} -foffload-options=@var{arg} -foffload-memory=@var{arg} @gol
-fopenacc -fopenacc-dim=@var{geom} @gol
--fopenmp -fopenmp-simd -fopenmp-target-simd-clone @gol
+-fopenmp -fopenmp-simd @gol
-fpermitted-flt-eval-methods=@var{standard} @gol
-fplan9-extensions -fsigned-bitfields -funsigned-bitfields @gol
-fsigned-char -funsigned-char -fsso-struct=@var{endianness}}
@code{[[omp::directive(...)]]} and @code{[[omp::sequence(...)]]} in C++
and @code{!$omp} in Fortran. Other OpenMP directives are ignored.
-@item -fopenmp-target-simd-clone
-@opindex fopenmp-target-simd-clone
-@cindex OpenMP target SIMD clone
-In addition to generating SIMD clones for functions marked with the
-@code{declare simd} directive, GCC also generates clones
-for functions marked with the OpenMP @code{declare target} directive
-that are suitable for vectorization when this option is in effect.
-It is enabled by default at @option{-O2} and higher (but not @option{-Os}
-or @option{-Og}).
-
@item -fpermitted-flt-eval-methods=@var{style}
@opindex fpermitted-flt-eval-methods
@opindex fpermitted-flt-eval-methods=c11
this target than a sequence of elementwise loads or stores.
@end deftypevr
-@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}, @var{bool})
+@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int})
This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
@var{simdlen} field if it was previously 0.
#include "stringpool.h"
#include "attribs.h"
#include "omp-simd-clone.h"
-#include "omp-low.h"
-#include "omp-general.h"
-
-/* Helper function for mark_auto_simd_clone; return false if the statement
- violates restrictions for an "omp declare simd" function. Specifically,
- the function must not
- - throw or call setjmp/longjmp
- - write memory that could alias parallel calls
- - include openmp directives or calls
- - call functions that might do those things */
-
-static bool
-auto_simd_check_stmt (gimple *stmt, tree outer)
-{
- tree decl;
-
- switch (gimple_code (stmt))
- {
- case GIMPLE_CALL:
- decl = gimple_call_fndecl (stmt);
-
- /* We can't know whether indirect calls are safe. */
- if (decl == NULL_TREE)
- return false;
-
- /* Calls to functions that are CONST or PURE are ok. */
- if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))
- break;
-
- /* Calls to functions that are already marked "omp declare simd" are
- OK. */
- if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl)))
- break;
-
- /* Let recursive calls to the current function through. */
- if (decl == outer)
- break;
-
- /* Other function calls are not permitted. */
- return false;
-
- /* OpenMP directives are not permitted. */
- CASE_GIMPLE_OMP:
- return false;
-
- /* Conservatively reject all EH-related constructs. */
- case GIMPLE_CATCH:
- case GIMPLE_EH_FILTER:
- case GIMPLE_EH_MUST_NOT_THROW:
- case GIMPLE_EH_ELSE:
- case GIMPLE_EH_DISPATCH:
- case GIMPLE_RESX:
- case GIMPLE_TRY:
- return false;
-
- /* Asms are not permitted since we don't know what they do. */
- case GIMPLE_ASM:
- return false;
-
- default:
- break;
- }
-
- /* Memory writes are not permitted.
- FIXME: this could be relaxed a little to permit writes to
- function-local variables that could not alias other instances
- of the function running in parallel. */
- if (gimple_store_p (stmt))
- return false;
- else
- return true;
-}
-
-/* If the function NODE appears suitable for auto-annotation with "declare
- simd", add and return such an attribute, otherwise return null. */
-
-static tree
-mark_auto_simd_clone (struct cgraph_node *node)
-{
- tree decl = node->decl;
- tree t;
- machine_mode m;
- tree result;
- basic_block bb;
-
- /* Nothing to do if the function isn't a definition or doesn't
- have a body. */
- if (!node->definition || !node->has_gimple_body_p ())
- return NULL_TREE;
-
- /* Nothing to do if the function already has the "omp declare simd"
- attribute, is marked noclone, or is not "omp declare target". */
- if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl))
- || lookup_attribute ("noclone", DECL_ATTRIBUTES (decl))
- || !lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl)))
- return NULL_TREE;
-
- /* Backends will check for vectorizable arguments/return types in a
- target-specific way, but we can immediately filter out functions
- that have non-scalar arguments/return types. Also, atomic types
- trigger warnings in simd_clone_clauses_extract. */
- t = TREE_TYPE (TREE_TYPE (decl));
- m = TYPE_MODE (t);
- if (!(VOID_TYPE_P (t) || is_a <scalar_mode> (m)) || TYPE_ATOMIC (t))
- return NULL_TREE;
-
- if (TYPE_ARG_TYPES (TREE_TYPE (decl)))
- {
- for (tree temp = TYPE_ARG_TYPES (TREE_TYPE (decl));
- temp; temp = TREE_CHAIN (temp))
- {
- t = TREE_VALUE (temp);
- m = TYPE_MODE (t);
- if (!(VOID_TYPE_P (t) || is_a <scalar_mode> (m)) || TYPE_ATOMIC (t))
- return NULL_TREE;
- }
- }
- else
- {
- for (tree temp = DECL_ARGUMENTS (decl); temp; temp = DECL_CHAIN (temp))
- {
- t = TREE_TYPE (temp);
- m = TYPE_MODE (t);
- if (!(VOID_TYPE_P (t) || is_a <scalar_mode> (m)) || TYPE_ATOMIC (t))
- return NULL_TREE;
- }
- }
-
- /* Scan the function body to see if it is suitable for SIMD-ization. */
- node->get_body ();
-
- FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl))
- {
- for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
- gsi_next (&gsi))
- if (!auto_simd_check_stmt (gsi_stmt (gsi), decl))
- return NULL_TREE;
- }
-
- /* All is good. */
- result = tree_cons (get_identifier ("omp declare simd"), NULL,
- DECL_ATTRIBUTES (decl));
- DECL_ATTRIBUTES (decl) = result;
- return result;
-}
-
/* Return the number of elements in vector type VECTYPE, which is associated
with a SIMD clone. At present these always have a constant length. */
return get_identifier (str);
}
-/* Create a simd clone of OLD_NODE and return it. If FORCE_LOCAL is true,
- create it as a local symbol, otherwise copy the symbol linkage and
- visibility attributes from OLD_NODE. */
+/* Create a simd clone of OLD_NODE and return it. */
static struct cgraph_node *
-simd_clone_create (struct cgraph_node *old_node, bool force_local)
+simd_clone_create (struct cgraph_node *old_node)
{
struct cgraph_node *new_node;
if (old_node->definition)
return new_node;
set_decl_built_in_function (new_node->decl, NOT_BUILT_IN, 0);
- if (force_local)
- {
- TREE_PUBLIC (new_node->decl) = 0;
- DECL_COMDAT (new_node->decl) = 0;
- DECL_WEAK (new_node->decl) = 0;
- DECL_EXTERNAL (new_node->decl) = 0;
- DECL_VISIBILITY_SPECIFIED (new_node->decl) = 0;
- DECL_VISIBILITY (new_node->decl) = VISIBILITY_DEFAULT;
- DECL_DLLIMPORT_P (new_node->decl) = 0;
- }
- else
- {
- TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
- DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
- DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
- DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
- DECL_VISIBILITY_SPECIFIED (new_node->decl)
- = DECL_VISIBILITY_SPECIFIED (old_node->decl);
- DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl);
- DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl);
- if (DECL_ONE_ONLY (old_node->decl))
- make_decl_one_only (new_node->decl,
- DECL_ASSEMBLER_NAME (new_node->decl));
-
- /* The method cgraph_version_clone_with_body () will force the new
- symbol local. Undo this, and inherit external visibility from
- the old node. */
- new_node->local = old_node->local;
- new_node->externally_visible = old_node->externally_visible;
- new_node->calls_declare_variant_alt
- = old_node->calls_declare_variant_alt;
- }
+ TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
+ DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
+ DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
+ DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
+ DECL_VISIBILITY_SPECIFIED (new_node->decl)
+ = DECL_VISIBILITY_SPECIFIED (old_node->decl);
+ DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl);
+ DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl);
+ if (DECL_ONE_ONLY (old_node->decl))
+ make_decl_one_only (new_node->decl, DECL_ASSEMBLER_NAME (new_node->decl));
+
+ /* The method cgraph_version_clone_with_body () will force the new
+ symbol local. Undo this, and inherit external visibility from
+ the old node. */
+ new_node->local = old_node->local;
+ new_node->externally_visible = old_node->externally_visible;
+ new_node->calls_declare_variant_alt = old_node->calls_declare_variant_alt;
return new_node;
}
void
expand_simd_clones (struct cgraph_node *node)
{
- tree attr;
- bool explicit_p = true;
-
- if (node->inlined_to
- || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
- return;
-
- attr = lookup_attribute ("omp declare simd",
- DECL_ATTRIBUTES (node->decl));
-
- /* See if we can add an "omp declare simd" directive implicitly
- before giving up. */
- /* FIXME: OpenACC "#pragma acc routine" translates into
- "omp declare target", but appears also to have some other effects
- that conflict with generating SIMD clones, causing ICEs. So don't
- do this if we've got OpenACC instead of OpenMP. */
+ tree attr = lookup_attribute ("omp declare simd",
+ DECL_ATTRIBUTES (node->decl));
if (attr == NULL_TREE
- && flag_openmp_target_simd_clone
- && !oacc_get_fn_attrib (node->decl))
- {
- attr = mark_auto_simd_clone (node);
- explicit_p = false;
- }
- if (attr == NULL_TREE)
+ || node->inlined_to
+ || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
return;
/* Ignore
poly_uint64 orig_simdlen = clone_info->simdlen;
tree base_type = simd_clone_compute_base_data_type (node, clone_info);
-
/* The target can return 0 (no simd clones should be created),
1 (just one ISA of simd clones should be created) or higher
count of ISA variants. In that case, clone_info is initialized
for the first ISA variant. */
int count
= targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
- base_type, 0,
- explicit_p);
+ base_type, 0);
if (count == 0)
continue;
/* And call the target hook again to get the right ISA. */
targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
base_type,
- i / 2,
- explicit_p);
+ i / 2);
if ((i & 1) != 0)
clone->inbranch = 1;
}
/* Only when we are sure we want to create the clone actually
clone the function (or definitions) or create another
extern FUNCTION_DECL (for prototypes without definitions). */
- struct cgraph_node *n = simd_clone_create (node, !explicit_p);
+ struct cgraph_node *n = simd_clone_create (node);
if (n == NULL)
{
if (i == 0)
REORDER_BLOCKS_ALGORITHM_STC },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_ftree_loop_vectorize, NULL, 1 },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_ftree_slp_vectorize, NULL, 1 },
- { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fopenmp_target_simd_clone, NULL, 1 },
#ifdef INSN_SCHEDULING
/* Only run the pre-regalloc scheduling pass if optimizing for speed. */
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fschedule_insns, NULL, 1 },
not determined by the bitsize (in which case @var{simdlen} is always used).\n\
The hook should return 0 if SIMD clones shouldn't be emitted,\n\
or number of @var{vecsize_mangle} variants that should be emitted.",
-int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int, bool), NULL)
+int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL)
DEFHOOK
(adjust,
+++ /dev/null
-/* { dg-options "-fopenmp -O2" } */
-
-/* Test that simd clones are generated for functions with "declare target". */
-
-#pragma omp declare target
-int addit(int a, int b, int c)
-{
- return a + b;
-}
-#pragma omp end declare target
-
-/* Although addit has external linkage, we expect clones to be generated as
- for a function with internal linkage. */
-
-/* { dg-final { scan-assembler "\\.type.*_ZGVbN4vvv_addit,.*function" { target i?86-*-* x86_64-*-* } } } */
-/* { dg-final { scan-assembler "\\.type.*_ZGVbM4vvv_addit,.*function" { target i?86-*-* x86_64-*-* } } } */
-/* { dg-final { scan-assembler-not "\\.globl.*_ZGVbN4vvv_addit" { target i?86-*-* x86_64-*-* } } } */
-/* { dg-final { scan-assembler-not "\\.globl.*_ZGVbM4vvv_addit" { target i?86-*-* x86_64-*-* } } } */
+++ /dev/null
-/* { dg-options "-fopenmp -O2" } */
-
-/* Test that simd clones are not generated for functions with
- "declare target" but unsuitable arguments. */
-
-struct s {
- int a;
- int b;
-};
-
-#pragma omp declare target
-int addit (struct s x)
-{
- return x.a + x.b;
-}
-#pragma omp end declare target
-
-/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */
+++ /dev/null
-/* { dg-options "-fopenmp -O2" } */
-
-/* Test that simd clones are not generated for functions with
- "declare target" but that call possibly side-effecting functions
- in the body. */
-
-extern int f (int);
-
-#pragma omp declare target
-int addit(int a, int b, int c)
-{
- return f(a) + b;
-}
-#pragma omp end declare target
-
-/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */
-
+++ /dev/null
-/* { dg-options "-fopenmp -O2" } */
-
-/* Test that simd clones are not generated for functions with
- "declare target" but that write memory in the body. */
-
-extern int save;
-
-#pragma omp declare target
-int addit(int a, int b, int c)
-{
- save = c;
- return a + b;
-}
-#pragma omp end declare target
-
-/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */
+++ /dev/null
-/* { dg-options "-fopenmp -Os" } */
-
-/* Test that simd clones are not generated for functions with
- "declare target" at -Os. */
-
-#pragma omp declare target
-int addit(int a, int b, int c)
-{
- return a + b;
-}
-#pragma omp end declare target
-
-/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */
+++ /dev/null
-/* { dg-options "-fopenmp -Og" } */
-
-/* Test that simd clones are not generated for functions with
- "declare target" at -Og. */
-
-#pragma omp declare target
-int addit(int a, int b, int c)
-{
- return a + b;
-}
-#pragma omp end declare target
-
-/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */