#include "tree-vector-builder.h"
#include "context.h"
#include "tree-pass.h"
+#include "symbol-summary.h"
+#include "ipa-prop.h"
+#include "ipa-fnsummary.h"
#include "except.h"
#if TARGET_XCOFF
#include "xcoffout.h" /* get declarations of xcoff_*_section_name */
#undef TARGET_INVALID_CONVERSION
#define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
+
+#undef TARGET_NEED_IPA_FN_TARGET_INFO
+#define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
+
+#undef TARGET_UPDATE_IPA_FN_TARGET_INFO
+#define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
\f
/* Processor table. */
return resolver;
}
-\f
+/* Hook to decide if we need to scan function gimple statements to
+ collect target specific information for inlining, and update the
+ corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
+ to predict which ISA feature is used at this time. Return true
+ if we need to scan, otherwise return false. */
+
+static bool
+rs6000_need_ipa_fn_target_info (const_tree decl,
+ unsigned int &info ATTRIBUTE_UNUSED)
+{
+ tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
+ if (!target)
+ target = target_option_default_node;
+ struct cl_target_option *opts = TREE_TARGET_OPTION (target);
+
+ /* See PR102059, we only handle HTM for now, so will only do
+ the consequent scannings when HTM feature enabled. */
+ if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
+ return true;
+
+ return false;
+}
+
+/* Hook to update target specific information INFO for inlining by
+ checking the given STMT. Return false if we don't need to scan
+ any more, otherwise return true. */
+
+static bool
+rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
+{
+ /* Assume inline asm can use any instruction features. */
+ if (gimple_code (stmt) == GIMPLE_ASM)
+ {
+ /* Should set any bits we concerned, for now OPTION_MASK_HTM is
+ the only bit we care about. */
+ info |= RS6000_FN_TARGET_INFO_HTM;
+ return false;
+ }
+ else if (gimple_code (stmt) == GIMPLE_CALL)
+ {
+ tree fndecl = gimple_call_fndecl (stmt);
+ if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
+ {
+ enum rs6000_gen_builtins fcode
+ = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+ /* HTM bifs definitely exploit HTM insns. */
+ if (bif_is_htm (rs6000_builtin_info[fcode]))
+ {
+ info |= RS6000_FN_TARGET_INFO_HTM;
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
/* Hook to determine if one function can safely inline another. */
static bool
else
caller_isa = rs6000_isa_flags;
+ cgraph_node *callee_node = cgraph_node::get (callee);
+ if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
+ {
+ unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
+ if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
+ {
+ callee_isa &= ~OPTION_MASK_HTM;
+ explicit_isa &= ~OPTION_MASK_HTM;
+ }
+ }
+
/* The callee's options must be a subset of the caller's options, i.e.
a vsx function may inline an altivec function, but a no-vsx function
must not inline a vsx function. However, for those options that the
&& TARGET_P8_VECTOR \
&& TARGET_POWERPC64)
+/* Inlining allows targets to define the meanings of bits in target_info
+ field of ipa_fn_summary by itself, the used bits for rs6000 are listed
+ below. */
+#define RS6000_FN_TARGET_INFO_HTM 1
+
/* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default. */
specific target options and the caller does not use the same options.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_UPDATE_IPA_FN_TARGET_INFO (unsigned int& @var{info}, const gimple* @var{stmt})
+Allow target to analyze all gimple statements for the given function to
+record and update some target specific information for inlining. A typical
+example is that a caller with one isa feature disabled is normally not
+allowed to inline a callee with that same isa feature enabled even which is
+attributed by always_inline, but with the conservative analysis on all
+statements of the callee if we are able to guarantee the callee does not
+exploit any instructions from the mismatch isa feature, it would be safe to
+allow the caller to inline the callee.
+@var{info} is one @code{unsigned int} value to record information in which
+one set bit indicates one corresponding feature is detected in the analysis,
+@var{stmt} is the statement being analyzed. Return true if target still
+need to analyze the subsequent statements, otherwise return false to stop
+subsequent analysis.
+The default version of this hook returns false.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_NEED_IPA_FN_TARGET_INFO (const_tree @var{decl}, unsigned int& @var{info})
+Allow target to check early whether it is necessary to analyze all gimple
+statements in the given function to update target specific information for
+inlining. See hook @code{update_ipa_fn_target_info} for usage example of
+target specific information. This hook is expected to be invoked ahead of
+the iterating with hook @code{update_ipa_fn_target_info}.
+@var{decl} is the function being analyzed, @var{info} is the same as what
+in hook @code{update_ipa_fn_target_info}, target can do one time update
+into @var{info} without iterating for some case. Return true if target
+decides to analyze all gimple statements to collect information, otherwise
+return false.
+The default version of this hook returns false.
+@end deftypefn
+
@deftypefn {Target Hook} void TARGET_RELAYOUT_FUNCTION (tree @var{fndecl})
This target hook fixes function @var{fndecl} after attributes are processed.
Default does nothing. On ARM, the default function's alignment is updated
@hook TARGET_CAN_INLINE_P
+@hook TARGET_UPDATE_IPA_FN_TARGET_INFO
+
+@hook TARGET_NEED_IPA_FN_TARGET_INFO
+
@hook TARGET_RELAYOUT_FUNCTION
@node Emulated TLS
#include "system.h"
#include "coretypes.h"
#include "backend.h"
+#include "target.h"
#include "tree.h"
#include "gimple.h"
#include "alloc-pool.h"
fprintf (f, " calls:\n");
dump_ipa_call_summary (f, 4, node, s);
fprintf (f, "\n");
+ if (s->target_info)
+ fprintf (f, " target_info: %x\n", s->target_info);
}
else
fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
bb_predicate,
bb_predicate);
+ /* Only look for target information for inlinable functions. */
+ bool scan_for_target_info =
+ info->inlinable
+ && targetm.target_option.need_ipa_fn_target_info (node->decl,
+ info->target_info);
+
if (fbi.info)
compute_bb_predicates (&fbi, node, info, params_summary);
const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
}
}
+ /* For target specific information, we want to scan all statements
+ rather than those statements with non-zero weights, to avoid
+ missing to scan something interesting for target information,
+ such as: internal function calls. */
+ if (scan_for_target_info)
+ scan_for_target_info =
+ targetm.target_option.update_ipa_fn_target_info
+ (info->target_info, stmt);
+
/* Account cost of address calculations in the statements. */
for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
{
toplev_predicate = true;
info->fp_expressions |= callee_info->fp_expressions;
+ info->target_info |= callee_info->target_info;
if (callee_info->conds)
{
bp = streamer_read_bitpack (&ib);
if (info)
{
- info->inlinable = bp_unpack_value (&bp, 1);
- info->fp_expressions = bp_unpack_value (&bp, 1);
+ info->inlinable = bp_unpack_value (&bp, 1);
+ info->fp_expressions = bp_unpack_value (&bp, 1);
+ if (!lto_stream_offload_p)
+ info->target_info = streamer_read_uhwi (&ib);
}
else
{
- bp_unpack_value (&bp, 1);
- bp_unpack_value (&bp, 1);
+ bp_unpack_value (&bp, 1);
+ bp_unpack_value (&bp, 1);
+ if (!lto_stream_offload_p)
+ streamer_read_uhwi (&ib);
}
count2 = streamer_read_uhwi (&ib);
bp_pack_value (&bp, info->inlinable, 1);
bp_pack_value (&bp, info->fp_expressions, 1);
streamer_write_bitpack (&bp);
+ if (!lto_stream_offload_p)
+ streamer_write_uhwi (ob, info->target_info);
streamer_write_uhwi (ob, vec_safe_length (info->conds));
for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
{
ipa_fn_summary ()
: min_size (0),
inlinable (false), single_caller (false),
- fp_expressions (false),
+ fp_expressions (false), target_info (0),
estimated_stack_size (false),
time (0), conds (NULL),
size_time_table (), call_size_time_table (vNULL),
: min_size (s.min_size),
inlinable (s.inlinable), single_caller (s.single_caller),
fp_expressions (s.fp_expressions),
+ target_info (s.target_info),
estimated_stack_size (s.estimated_stack_size),
time (s.time), conds (s.conds), size_time_table (),
call_size_time_table (vNULL),
unsigned int single_caller : 1;
/* True if function contains any floating point expressions. */
unsigned int fp_expressions : 1;
+ /* Like fp_expressions field above, but it's to hold some target specific
+ information, such as some target specific isa flags. Note that for
+ offloading target compilers, this field isn't streamed. */
+ unsigned int target_info;
/* Information about function that will result after applying all the
inline decisions present in the callgraph. Generally kept up to
bool, (tree caller, tree callee),
default_target_can_inline_p)
+DEFHOOK
+(update_ipa_fn_target_info,
+ "Allow target to analyze all gimple statements for the given function to\n\
+record and update some target specific information for inlining. A typical\n\
+example is that a caller with one isa feature disabled is normally not\n\
+allowed to inline a callee with that same isa feature enabled even which is\n\
+attributed by always_inline, but with the conservative analysis on all\n\
+statements of the callee if we are able to guarantee the callee does not\n\
+exploit any instructions from the mismatch isa feature, it would be safe to\n\
+allow the caller to inline the callee.\n\
+@var{info} is one @code{unsigned int} value to record information in which\n\
+one set bit indicates one corresponding feature is detected in the analysis,\n\
+@var{stmt} is the statement being analyzed. Return true if target still\n\
+need to analyze the subsequent statements, otherwise return false to stop\n\
+subsequent analysis.\n\
+The default version of this hook returns false.",
+ bool, (unsigned int& info, const gimple* stmt),
+ default_update_ipa_fn_target_info)
+
+DEFHOOK
+(need_ipa_fn_target_info,
+ "Allow target to check early whether it is necessary to analyze all gimple\n\
+statements in the given function to update target specific information for\n\
+inlining. See hook @code{update_ipa_fn_target_info} for usage example of\n\
+target specific information. This hook is expected to be invoked ahead of\n\
+the iterating with hook @code{update_ipa_fn_target_info}.\n\
+@var{decl} is the function being analyzed, @var{info} is the same as what\n\
+in hook @code{update_ipa_fn_target_info}, target can do one time update\n\
+into @var{info} without iterating for some case. Return true if target\n\
+decides to analyze all gimple statements to collect information, otherwise\n\
+return false.\n\
+The default version of this hook returns false.",
+ bool, (const_tree decl, unsigned int& info),
+ default_need_ipa_fn_target_info)
+
DEFHOOK
(relayout_function,
"This target hook fixes function @var{fndecl} after attributes are processed.\n\
return callee_opts == caller_opts;
}
+/* By default, return false to not need to collect any target information
+ for inlining. Target maintainer should re-define the hook if the
+ target want to take advantage of it. */
+
+bool
+default_need_ipa_fn_target_info (const_tree, unsigned int &)
+{
+ return false;
+}
+
+bool
+default_update_ipa_fn_target_info (unsigned int &, const gimple *)
+{
+ return false;
+}
+
/* If the machine does not have a case insn that compares the bounds,
this means extra overhead for dispatch tables, which raises the
threshold for using them. */
extern bool default_target_option_valid_attribute_p (tree, tree, tree, int);
extern bool default_target_option_pragma_parse (tree, tree);
extern bool default_target_can_inline_p (tree, tree);
+extern bool default_update_ipa_fn_target_info (unsigned int &, const gimple *);
+extern bool default_need_ipa_fn_target_info (const_tree, unsigned int &);
extern bool default_valid_pointer_mode (scalar_int_mode);
extern bool default_ref_may_alias_errno (class ao_ref *);
extern scalar_int_mode default_addr_space_pointer_mode (addr_space_t);
--- /dev/null
+/* { dg-lto-do link } */
+/* { dg-skip-if "power10 and above only" { ! { power10_ok } } } */
+/* -Wno-attributes suppresses always_inline warnings. */
+/* { dg-lto-options { "-O2 -mdejagnu-cpu=power8 -flto -Wno-attributes -mno-power8-fusion" } } */
+
+int __attribute__ ((always_inline))
+foo1 (int *b)
+{
+ *b += 100;
+ return *b;
+}
+
--- /dev/null
+extern int foo1 (int *b);
+
+int __attribute__ ((always_inline)) foo2 (int *b)
+{
+ int res = foo1 (b);
+ *b += res;
+ return *b;
+}
+
--- /dev/null
+extern int foo2 (int *b);
+
+#pragma GCC target "cpu=power10"
+__attribute__ ((always_inline))
+int
+main (int *a)
+{
+ *a = foo2 (a);
+ return 0;
+}
+
--- /dev/null
+/* { dg-lto-do link } */
+/* { dg-skip-if "power10 and above only" { ! { power10_ok } } } */
+/* { dg-lto-options { "-O2 -mdejagnu-cpu=power8 -mno-power8-fusion -flto -fdump-ipa-inline" } } */
+
+int
+foo1 (int *b)
+{
+ *b += 100;
+ return *b;
+}
+
+/* { dg-final { scan-wpa-ipa-dump-not "target specific option mismatch" "inline" } } */
--- /dev/null
+extern int foo1 (int *b);
+
+int foo2 (int *b)
+{
+ int res = foo1 (b);
+ *b += res;
+ return *b;
+}
+
--- /dev/null
+extern int foo2 (int *b);
+
+#pragma GCC target "cpu=power10"
+int
+main (int *a)
+{
+ *a = foo2 (a);
+ return 0;
+}
+
--- /dev/null
+/* { dg-do compile } */
+/* -Wno-attributes suppresses always_inline warnings. */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -Wno-attributes" } */
+
+/* Verify the reduced case from PR102059 won't fail. */
+
+__attribute__ ((always_inline)) int
+foo (int *b)
+{
+ *b += 10;
+ return *b;
+}
+
+#pragma GCC target "cpu=power10"
+int
+bar (int *a)
+{
+ *a = foo (a);
+ return 0;
+}
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -mno-htm" } */
+
+/* Verify target info for inlining still works even if callee
+ disables HTM explicitly while caller enables it. */
+
+static inline int __attribute__ ((always_inline))
+foo (int *b)
+{
+ *b += 10;
+ return *b;
+}
+
+#pragma GCC target "htm"
+int
+bar (int *a)
+{
+ *a = foo (a);
+ return 0;
+}
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -mno-power8-fusion -fdump-tree-einline-optimized" } */
+
+/* Like pr102059-1.c, to verify the inlining still happens
+ even without always_inline attribute. */
+
+int foo (int *b)
+{
+ *b += 10;
+ return *b;
+}
+
+#pragma GCC target "cpu=power10"
+int
+bar (int *a)
+{
+ *a = foo (a);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times {Inlining foo/[0-9]* } 1 "einline"} } */