Adds an optimisation in FMV to redirect to a specific target if possible.
A call is redirected to a specific target if both:
- the caller can always call the callee version
- and, it is possible to rule out all higher priority versions of the callee
fmv set. That is estabilished either by the callee being the highest priority
version, or each higher priority version of the callee implying that, were it
resolved, a higher priority version of the caller would have been selected.
For this logic, introduces the new TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A
hook. Adds a full implementation for Aarch64, and a weaker default version
for other targets.
This allows the target to replace the previous optimisation as the new one is
able to cover the same case where two function sets implement the same versions.
gcc/ChangeLog:
* config/aarch64/aarch64.cc (aarch64_functions_b_resolvable_from_a): New
function.
(TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A): New define.
* doc/tm.texi: Regenerate.
* doc/tm.texi.in: Add documentation for
TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A.
* multiple_target.cc (redirect_to_specific_clone): Add new optimisation
logic.
(ipa_target_clone): Remove check for TARGET_HAS_FMV_TARGET_ATTRIBUTE.
* target.def: Document new hook..
* attribs.cc: (functions_b_resolvable_from_a) New function.
* attribs.h: (functions_b_resolvable_from_a) New function.
gcc/testsuite/ChangeLog:
* g++.target/aarch64/fmv-selection1.C: New test.
* g++.target/aarch64/fmv-selection2.C: New test.
* g++.target/aarch64/fmv-selection3.C: New test.
* g++.target/aarch64/fmv-selection4.C: New test.
* g++.target/aarch64/fmv-selection5.C: New test.
* g++.target/aarch64/fmv-selection6.C: New test.
* g++.target/aarch64/fmv-selection7.C: New test.
return attr;
}
+/* Default implementation of TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A.
+ Used to check very basically if DECL_B is callable from DECL_A.
+ For now this checks if the version strings are the same. */
+
+bool
+functions_b_resolvable_from_a (tree decl_a, tree decl_b,
+ tree base ATTRIBUTE_UNUSED)
+{
+ const char *attr_name = TARGET_HAS_FMV_TARGET_ATTRIBUTE
+ ? "target"
+ : "target_version";
+
+ tree attr_a = lookup_attribute (attr_name, DECL_ATTRIBUTES (decl_a));
+ tree attr_b = lookup_attribute (attr_name, DECL_ATTRIBUTES (decl_b));
+
+ gcc_assert (attr_b);
+ if (!attr_a)
+ return false;
+
+ return attribute_value_equal (attr_a, attr_b);
+}
+
/* Comparator function to be used in qsort routine to sort attribute
specification strings to "target". */
extern char *sorted_attr_string (tree);
extern tree make_dispatcher_decl (const tree);
extern bool is_function_default_version (const tree);
+extern bool functions_b_resolvable_from_a (tree, tree, tree);
extern void handle_ignored_attributes_option (vec<char *> *);
/* Return a type like TTYPE except that its TYPE_ATTRIBUTES
return compare_feature_masks (mask1, mask2);
}
+/* Implement TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A. */
+
+bool
+aarch64_functions_b_resolvable_from_a (tree decl_a, tree decl_b, tree baseline)
+{
+ auto baseline_isa = aarch64_get_isa_flags
+ (TREE_TARGET_OPTION (aarch64_fndecl_options (baseline)));
+ auto isa_a = baseline_isa;
+ auto isa_b = baseline_isa;
+
+ auto a_version = get_target_version (decl_a);
+ auto b_version = get_target_version (decl_b);
+ if (a_version.is_valid ())
+ aarch64_parse_fmv_features (a_version, &isa_a, NULL, NULL);
+ if (b_version.is_valid ())
+ aarch64_parse_fmv_features (b_version, &isa_b, NULL, NULL);
+
+ /* Are there any bits of b that arent in a. */
+ if (isa_b & (~isa_a))
+ return false;
+
+ return true;
+}
+
/* Build the struct __ifunc_arg_t type:
struct __ifunc_arg_t
#undef TARGET_COMPARE_VERSION_PRIORITY
#define TARGET_COMPARE_VERSION_PRIORITY aarch64_compare_version_priority
+#undef TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A
+#define TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A \
+ aarch64_functions_b_resolvable_from_a
+
#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
aarch64_generate_version_dispatcher_body
@var{fn1} and @var{fn2} imply the same function version.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A (tree @var{decl_a}, tree @var{decl_v}, tree @var{base})
+@var{decl_b} is a function declaration with a function multi-versioning
+(FMV) attribute; this attribute is either @code{target} or
+@code{target_version}, depending on @code{TARGET_HAS_FMV_TARGET_ATTRIBUTE}.
+@var{decl_a} is a function declaration that may or may not have an FMV
+attribute.
+
+Return true if we have enough information to determine that the
+requirements of @var{decl_b}'s FMV attribute are met whenever @var{decl_a}
+is executed, given that the target supports all features required by
+function declaration @var{base}.
+
+The default implementation just checks whether @var{decl_a} has the same
+FMV attribute as @var{decl_b}. This is conservatively correct,
+but ports can do better by taking the relationships between architecture
+features into account. For example, on AArch64, @code{sve} is present
+whenever @code{sve2} is present.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_CAN_INLINE_P (tree @var{caller}, tree @var{callee})
This target hook returns @code{false} if the @var{caller} function
cannot inline @var{callee}, based on target specific information. By
@hook TARGET_OPTION_SAME_FUNCTION_VERSIONS
+@hook TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A
+
@hook TARGET_CAN_INLINE_P
@hook TARGET_UPDATE_IPA_FN_TARGET_INFO
return true;
}
-/* When NODE is a target clone, consider all callees and redirect
- to a clone with equal target attributes. That prevents multiple
- multi-versioning dispatches and a call-chain can be optimized.
-
- This optimisation might pick the wrong version in some cases, since knowing
- that we meet the target requirements for a matching callee version does not
- tell us that we won't also meet the target requirements for a higher
- priority callee version at runtime. Since this is longstanding behaviour
- for x86 and powerpc, we preserve it for those targets, but skip the optimisation
- for targets that use the "target_version" attribute for multi-versioning. */
+/* When NODE is part of an FMV function set, consider all callees and check if
+ any can provably always resolve a certain version and then call that version
+ directly. */
static void
redirect_to_specific_clone (cgraph_node *node)
{
- cgraph_function_version_info *fv = node->function_version ();
- if (fv == NULL)
- return;
-
- gcc_assert (TARGET_HAS_FMV_TARGET_ATTRIBUTE);
- tree attr_target = lookup_attribute ("target", DECL_ATTRIBUTES (node->decl));
- if (attr_target == NULL_TREE)
+ if (!targetm.compare_version_priority || !optimize)
return;
/* We need to remember NEXT_CALLER as it could be modified in the loop. */
for (cgraph_edge *e = node->callees; e ; e = e->next_callee)
{
- cgraph_function_version_info *fv2 = e->callee->function_version ();
- if (!fv2)
+ /* Only if this is a call to a dispatched symbol. */
+ if (!e->callee->dispatcher_function)
continue;
- tree attr_target2 = lookup_attribute ("target",
- DECL_ATTRIBUTES (e->callee->decl));
+ cgraph_function_version_info *callee_v
+ = e->callee->function_version ();
+ cgraph_function_version_info *caller_v
+ = e->caller->function_version ();
+
+ gcc_assert (callee_v);
- /* Function is not calling proper target clone. */
- if (attr_target2 == NULL_TREE
- || !attribute_value_equal (attr_target, attr_target2))
+ /* Find the default nodes for both callee and caller (if present). */
+ cgraph_function_version_info *callee_default_v = callee_v->next;
+ cgraph_function_version_info *caller_default_v = caller_v;
+ if (caller_v)
{
- while (fv2->prev != NULL)
- fv2 = fv2->prev;
+ while (caller_default_v->prev)
+ caller_default_v = caller_default_v->prev;
+ if (!is_function_default_version (caller_default_v->this_node->decl))
+ caller_default_v = NULL;
+ }
+
+ /* If this is not the TU that contains the definition of the default
+ version we are not guaranteed to have visibility of all versions
+ so cannot reason about them. */
+ if (!callee_default_v
+ || !callee_default_v->this_node->binds_to_current_def_p ())
+ continue;
+
+ cgraph_function_version_info *highest_callable_fn = NULL;
+ for (cgraph_function_version_info *ver = callee_v->next;
+ ver;
+ ver = ver->next)
+ if (targetm.target_option.functions_b_resolvable_from_a
+ (node->decl, ver->this_node->decl, node->decl))
+ highest_callable_fn = ver;
- /* Try to find a clone with equal target attribute. */
- for (; fv2 != NULL; fv2 = fv2->next)
+ if (!highest_callable_fn)
+ continue;
+
+ bool inlinable = true;
+
+ /* If there are higher priority versions of callee and caller has no
+ more version information, then not callable. */
+ if (highest_callable_fn->next)
+ {
+ /* If this is not the TU where the callee default is defined then
+ cannot reason about the caller versions. */
+ if (!caller_default_v
+ || !caller_default_v->this_node->binds_to_current_def_p ())
+ continue;
+
+ /* If every higher priority version would imply a higher priority
+ version of caller would have been selected, then this is
+ callable. */
+ for (cgraph_function_version_info *callee_ver
+ = highest_callable_fn->next;
+ callee_ver; callee_ver = callee_ver->next)
{
- cgraph_node *callee = fv2->this_node;
- attr_target2 = lookup_attribute ("target",
- DECL_ATTRIBUTES (callee->decl));
- if (attr_target2 != NULL_TREE
- && attribute_value_equal (attr_target, attr_target2))
+ bool is_possible = true;
+ for (cgraph_function_version_info *caller_ver = caller_v->next;
+ caller_ver; caller_ver = caller_ver->next)
+ if (targetm.target_option.functions_b_resolvable_from_a
+ (callee_ver->this_node->decl, caller_ver->this_node->decl,
+ node->decl))
+ {
+ is_possible = false;
+ break;
+ }
+ if (is_possible)
{
- e->redirect_callee (callee);
- cgraph_edge::redirect_call_stmt_to_callee (e);
+ inlinable = false;
break;
}
}
}
+ if (inlinable)
+ {
+ e->redirect_callee (highest_callable_fn->this_node);
+ cgraph_edge::redirect_call_stmt_to_callee (e);
+ }
}
}
for (unsigned i = 0; i < to_dispatch.length (); i++)
create_dispatcher_calls (to_dispatch[i]);
- if (TARGET_HAS_FMV_TARGET_ATTRIBUTE)
- FOR_EACH_FUNCTION (node)
- redirect_to_specific_clone (node);
+ FOR_EACH_FUNCTION (node)
+ redirect_to_specific_clone (node);
return 0;
}
bool, (string_slice fn1, string_slice fn2),
hook_stringslice_stringslice_unreachable)
+/* Checks if we can be certain that function DECL_A could resolve DECL_B. */
+DEFHOOK
+(functions_b_resolvable_from_a,
+ "@var{decl_b} is a function declaration with a function multi-versioning\n\
+(FMV) attribute; this attribute is either @code{target} or\n\
+@code{target_version}, depending on @code{TARGET_HAS_FMV_TARGET_ATTRIBUTE}.\n\
+@var{decl_a} is a function declaration that may or may not have an FMV\n\
+attribute.\n\
+\n\
+Return true if we have enough information to determine that the\n\
+requirements of @var{decl_b}'s FMV attribute are met whenever @var{decl_a}\n\
+is executed, given that the target supports all features required by\n\
+function declaration @var{base}.\n\
+\n\
+The default implementation just checks whether @var{decl_a} has the same\n\
+FMV attribute as @var{decl_b}. This is conservatively correct,\n\
+but ports can do better by taking the relationships between architecture\n\
+features into account. For example, on AArch64, @code{sve} is present\n\
+whenever @code{sve2} is present.",
+ bool, (tree decl_a, tree decl_v, tree base),
+ functions_b_resolvable_from_a)
+
/* Function to determine if one function can inline another function. */
#undef HOOK_PREFIX
#define HOOK_PREFIX "TARGET_"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O2 -march=armv8-a" } */
+
+__attribute__((target_version("default")))
+__attribute__((optimize("O0")))
+int foo ()
+{
+ return 1;
+}
+
+__attribute__((target_version("rng")))
+__attribute__((optimize("O0")))
+int foo ()
+{
+ return 2;
+}
+
+__attribute__((target_version("flagm")))
+__attribute__((optimize("O0")))
+int foo ()
+{
+ return 3;
+}
+
+__attribute__((target_version("rng+flagm")))
+__attribute__((optimize("O0")))
+int foo ()
+{
+ return 4;
+}
+
+int bar()
+{
+ return foo ();
+}
+
+/* Cannot optimize */
+/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\n" 1 } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O2 -march=armv8-a+rng+flagm" } */
+
+__attribute__((target_version("default")))
+__attribute__((optimize("O0")))
+int foo ()
+{
+ return 1;
+}
+
+__attribute__((target_version("rng")))
+__attribute__((optimize("O0")))
+int foo ()
+{
+ return 2;
+}
+
+__attribute__((target_version("flagm")))
+__attribute__((optimize("O0")))
+int foo ()
+{
+ return 3;
+}
+
+__attribute__((target_version("rng+flagm")))
+__attribute__((optimize("O0")))
+int foo ()
+{
+ return 4;
+}
+
+int bar()
+{
+ return foo ();
+}
+
+/* Can optimize to highest priority function */
+/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\._MrngMflagm\n" 1 } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O2 -march=armv8-a" } */
+
+__attribute__((target_version("default")))
+__attribute__((optimize("O0")))
+int foo ()
+{ return 1; }
+
+__attribute__((target_version("rng")))
+int foo ();
+__attribute__((target_version("flagm")))
+int foo ();
+__attribute__((target_version("rng+flagm")))
+int foo ();
+
+__attribute__((target_version("rng+flagm")))
+int bar()
+{
+ return foo ();
+}
+
+/* Cannot optimize */
+/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\._MrngMflagm\n" 1 } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O2 -march=armv8-a" } */
+
+__attribute__((target_version("default")))
+__attribute__((optimize("O0")))
+int foo ()
+{ return 1; }
+
+__attribute__((target_version("rng")))
+int foo ();
+__attribute__((target_version("flagm")))
+int foo ();
+__attribute__((target_version("rng+flagm")))
+int foo ();
+
+__attribute__((target_version("default")))
+int bar()
+{
+ return foo ();
+}
+
+__attribute__((target_version("rng")))
+int bar();
+
+__attribute__((target_version("flagm")))
+int bar();
+
+/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\.default\n" 1 } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O2 -march=armv8-a" } */
+
+__attribute__((target_version("default")))
+__attribute__((optimize("O0")))
+int foo ()
+{ return 1; }
+
+__attribute__((target_version("rng")))
+int foo ();
+__attribute__((target_version("flagm")))
+int foo ();
+__attribute__((target_version("rng+flagm")))
+int foo ();
+
+__attribute__((target_version("default")))
+int bar()
+{
+ return foo ();
+}
+
+__attribute__((target_version("flagm")))
+int bar();
+
+/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\.default\n" 0 } } */
+/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\n" 1 } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O2 -march=armv8-a+rng" } */
+
+__attribute__((target_version("default")))
+__attribute__((optimize("O0")))
+int foo ()
+{ return 1; }
+
+__attribute__((target_version("rng")))
+int foo ();
+__attribute__((target_version("flagm")))
+int foo ();
+__attribute__((target_version("rng+flagm")))
+int foo ();
+
+__attribute__((target_version("default")))
+int bar()
+{
+ return foo ();
+}
+
+__attribute__((target_version("flagm")))
+int bar();
+
+/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\._Mrng\n" 1 } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O2 -march=armv8-a" } */
+
+[[gnu::optimize("O0")]]
+[[gnu::target_version ("default")]]
+int bar () {
+ return 1;
+}
+
+[[gnu::optimize("O0")]]
+[[gnu::target ("+sve2")]]
+[[gnu::target_version ("sve")]]
+int bar ();
+
+[[gnu::target ("+sve")]]
+int foo () {
+ return bar();
+}
+
+/* { dg-final { scan-assembler-times "\n\tb\t_Z3barv\._Msve\n" 1 } } */
+
+[[gnu::target_version ("default")]]
+int bar2 () {
+ return 1;
+}
+
+[[gnu::target_version ("sve2")]]
+int bar2 ();
+
+[[gnu::target_version ("default")]]
+int foo2 ();
+
+[[gnu::target_version ("sve")]]
+[[gnu::target ("+sve2")]]
+int foo2 () {
+ return bar2();
+}
+
+/* { dg-final { scan-assembler-times "\n\tb\t_Z4bar2v\._Msve2\n" 1 } } */
+
+[[gnu::target_version ("default")]]
+int bar3 () {
+ return 1;
+}
+
+[[gnu::target_version ("sve")]]
+int bar3 ();
+
+[[gnu::target ("+rng")]]
+[[gnu::target_version ("sve2")]]
+int bar3 ();
+
+[[gnu::target_version ("default")]]
+int foo3 ();
+
+[[gnu::target_version ("sve")]]
+int foo3 () {
+ return bar3 ();
+}
+
+[[gnu::target_version ("sve2+rng")]]
+int foo3 ();
+
+/* { dg-final { scan-assembler-times "\n\tb\t_Z4bar3v\n" 1 } } */