]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - gcc/ipa-inline-transform.c
c++: Handle multiple aggregate overloads [PR95319].
[thirdparty/gcc.git] / gcc / ipa-inline-transform.c
index 5e848028d8520dfa0aa23fb7946e146cbe857d4b..e9e21cc029669ef6df504873b901dbdc47647f61 100644 (file)
@@ -1,5 +1,5 @@
 /* Callgraph transformations to handle inlining
-   Copyright (C) 2003-2015 Free Software Foundation, Inc.
+   Copyright (C) 2003-2020 Free Software Foundation, Inc.
    Contributed by Jan Hubicka
 
 This file is part of GCC.
@@ -32,60 +32,45 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
-#include "tree.h"
-#include "langhooks.h"
-#include "intl.h"
-#include "coverage.h"
-#include "ggc.h"
-#include "tree-cfg.h"
-#include "vec.h"
-#include "hash-map.h"
-#include "is-a.h"
-#include "plugin-api.h"
-#include "hashtab.h"
-#include "hash-set.h"
-#include "machmode.h"
-#include "hard-reg-set.h"
-#include "input.h"
 #include "function.h"
-#include "ipa-ref.h"
-#include "cgraph.h"
+#include "tree.h"
 #include "alloc-pool.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "tree-cfg.h"
 #include "symbol-summary.h"
+#include "tree-vrp.h"
 #include "ipa-prop.h"
+#include "ipa-fnsummary.h"
 #include "ipa-inline.h"
 #include "tree-inline.h"
-#include "tree-pass.h"
+#include "function.h"
+#include "cfg.h"
+#include "basic-block.h"
+#include "ipa-utils.h"
 
 int ncalls_inlined;
 int nfunctions_inlined;
-bool speculation_removed;
 
-/* Scale frequency of NODE edges by FREQ_SCALE.  */
+/* Scale counts of NODE edges by NUM/DEN.  */
 
 static void
-update_noncloned_frequencies (struct cgraph_node *node,
-                             int freq_scale)
+update_noncloned_counts (struct cgraph_node *node, 
+                        profile_count num, profile_count den)
 {
   struct cgraph_edge *e;
 
-  /* We do not want to ignore high loop nest after freq drops to 0.  */
-  if (!freq_scale)
-    freq_scale = 1;
+  profile_count::adjust_for_ipa_scaling (&num, &den);
+
   for (e = node->callees; e; e = e->next_callee)
     {
-      e->frequency = e->frequency * (gcov_type) freq_scale / CGRAPH_FREQ_BASE;
-      if (e->frequency > CGRAPH_FREQ_MAX)
-        e->frequency = CGRAPH_FREQ_MAX;
       if (!e->inline_failed)
-        update_noncloned_frequencies (e->callee, freq_scale);
+        update_noncloned_counts (e->callee, num, den);
+      e->count = e->count.apply_scale (num, den);
     }
   for (e = node->indirect_calls; e; e = e->next_callee)
-    {
-      e->frequency = e->frequency * (gcov_type) freq_scale / CGRAPH_FREQ_BASE;
-      if (e->frequency > CGRAPH_FREQ_MAX)
-        e->frequency = CGRAPH_FREQ_MAX;
-    }
+    e->count = e->count.apply_scale (num, den);
+  node->count = node->count.apply_scale (num, den);
 }
 
 /* We removed or are going to remove the last call to NODE.
@@ -107,9 +92,12 @@ can_remove_node_now_p_1 (struct cgraph_node *node, struct cgraph_edge *e)
     }
   /* FIXME: When address is taken of DECL_EXTERNAL function we still
      can remove its offline copy, but we would need to keep unanalyzed node in
-     the callgraph so references can point to it.  */
+     the callgraph so references can point to it.
+
+     Also for comdat group we can ignore references inside a group as we
+     want to prove the group as a whole to be dead.  */
   return (!node->address_taken
-         && node->can_remove_if_no_direct_calls_p ()
+         && node->can_remove_if_no_direct_calls_and_refs_p ()
          /* Inlining might enable more devirtualizing, so we want to remove
             those only after all devirtualizable virtual calls are processed.
             Lacking may edges in callgraph we just preserve them post
@@ -117,7 +105,7 @@ can_remove_node_now_p_1 (struct cgraph_node *node, struct cgraph_edge *e)
          && (!DECL_VIRTUAL_P (node->decl)
              || !opt_for_fn (node->decl, flag_devirtualize))
          /* During early inlining some unanalyzed cgraph nodes might be in the
-            callgraph and they might reffer the function in question.  */
+            callgraph and they might refer the function in question.  */
          && !cgraph_new_nodes.exists ());
 }
 
@@ -134,7 +122,7 @@ can_remove_node_now_p (struct cgraph_node *node, struct cgraph_edge *e)
 
   /* When we see same comdat group, we need to be sure that all
      items can be removed.  */
-  if (!node->same_comdat_group)
+  if (!node->same_comdat_group || !node->externally_visible)
     return true;
   for (next = dyn_cast<cgraph_node *> (node->same_comdat_group);
        next != node; next = dyn_cast<cgraph_node *> (next->same_comdat_group))
@@ -170,18 +158,17 @@ master_clone_with_noninline_clones_p (struct cgraph_node *node)
    By default the offline copy is removed, when it appears dead after inlining.
    UPDATE_ORIGINAL prevents this transformation.
    If OVERALL_SIZE is non-NULL, the size is updated to reflect the
-   transformation.
-   FREQ_SCALE specify the scaling of frequencies of call sites.  */
+   transformation.  */
 
 void
 clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
-                    bool update_original, int *overall_size, int freq_scale)
+                    bool update_original, int *overall_size)
 {
   struct cgraph_node *inlining_into;
   struct cgraph_edge *next;
 
-  if (e->caller->global.inlined_to)
-    inlining_into = e->caller->global.inlined_to;
+  if (e->caller->inlined_to)
+    inlining_into = e->caller->inlined_to;
   else
     inlining_into = e->caller;
 
@@ -189,7 +176,7 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
     {
       /* We may eliminate the need for out-of-line copy to be output.
         In that case just go ahead and re-use it.  This is not just an
-        memory optimization.  Making offline copy of fuction disappear
+        memory optimization.  Making offline copy of function disappear
         from the program will improve future decisions on inlining.  */
       if (!e->callee->callers->next_caller
          /* Recursive inlining never wants the master clone to
@@ -205,29 +192,31 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
             need small function inlining to register edge removal hook to
             maintain the priority queue.
 
-            For now we keep the ohter functions in the group in program until
+            For now we keep the other functions in the group in program until
             cgraph_remove_unreachable_functions gets rid of them.  */
-         gcc_assert (!e->callee->global.inlined_to);
-         e->callee->dissolve_same_comdat_group_list ();
-         if (e->callee->definition && !DECL_EXTERNAL (e->callee->decl))
+         gcc_assert (!e->callee->inlined_to);
+         e->callee->remove_from_same_comdat_group ();
+         if (e->callee->definition
+             && inline_account_function_p (e->callee))
            {
+             gcc_assert (!e->callee->alias);
              if (overall_size)
-               *overall_size -= inline_summaries->get (e->callee)->size;
+               *overall_size -= ipa_size_summaries->get (e->callee)->size;
              nfunctions_inlined++;
            }
          duplicate = false;
          e->callee->externally_visible = false;
-          update_noncloned_frequencies (e->callee, e->frequency);
+          update_noncloned_counts (e->callee, e->count, e->callee->count);
+
+         dump_callgraph_transformation (e->callee, inlining_into,
+                                        "inlining to");
        }
       else
        {
          struct cgraph_node *n;
 
-         if (freq_scale == -1)
-           freq_scale = e->frequency;
          n = e->callee->create_clone (e->callee->decl,
-                                      MIN (e->count, e->callee->count),
-                                      freq_scale,
+                                      e->count,
                                       update_original, vNULL, true,
                                       inlining_into,
                                       NULL);
@@ -236,22 +225,89 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
        }
     }
   else
-    e->callee->dissolve_same_comdat_group_list ();
+    e->callee->remove_from_same_comdat_group ();
 
-  e->callee->global.inlined_to = inlining_into;
+  e->callee->inlined_to = inlining_into;
 
   /* Recursively clone all bodies.  */
   for (e = e->callee->callees; e; e = next)
     {
       next = e->next_callee;
       if (!e->inline_failed)
-        clone_inlined_nodes (e, duplicate, update_original, overall_size, freq_scale);
+        clone_inlined_nodes (e, duplicate, update_original, overall_size);
+    }
+}
+
+/* Check all speculations in N and if any seem useless, resolve them.  When a
+   first edge is resolved, pop all edges from NEW_EDGES and insert them to
+   EDGE_SET.  Then remove each resolved edge from EDGE_SET, if it is there.  */
+
+static bool
+check_speculations_1 (cgraph_node *n, vec<cgraph_edge *> *new_edges,
+                     hash_set <cgraph_edge *> *edge_set)
+{
+  bool speculation_removed = false;
+  cgraph_edge *next;
+
+  for (cgraph_edge *e = n->callees; e; e = next)
+    {
+      next = e->next_callee;
       if (e->speculative && !speculation_useful_p (e, true))
        {
-         e->resolve_speculation (NULL);
+         while (new_edges && !new_edges->is_empty ())
+           edge_set->add (new_edges->pop ());
+         edge_set->remove (e);
+
+         cgraph_edge::resolve_speculation (e, NULL);
          speculation_removed = true;
        }
+      else if (!e->inline_failed)
+       speculation_removed |= check_speculations_1 (e->callee, new_edges,
+                                                    edge_set);
     }
+  return speculation_removed;
+}
+
+/* Push E to NEW_EDGES.  Called from hash_set traverse method, which
+   unfortunately means this function has to have external linkage, otherwise
+   the code will not compile with gcc 4.8.  */
+
+bool
+push_all_edges_in_set_to_vec (cgraph_edge * const &e,
+                             vec<cgraph_edge *> *new_edges)
+{
+  new_edges->safe_push (e);
+  return true;
+}
+
+/* Check all speculations in N and if any seem useless, resolve them and remove
+   them from NEW_EDGES.  */
+
+static bool
+check_speculations (cgraph_node *n, vec<cgraph_edge *> *new_edges)
+{
+  hash_set <cgraph_edge *> edge_set;
+  bool res = check_speculations_1 (n, new_edges, &edge_set);
+  if (!edge_set.is_empty ())
+    edge_set.traverse <vec<cgraph_edge *> *,
+                      push_all_edges_in_set_to_vec> (new_edges);
+  return res;
+}
+
+/* Mark all call graph edges coming out of NODE and all nodes that have been
+   inlined to it as in_polymorphic_cdtor.  */
+
+static void
+mark_all_inlined_calls_cdtor (cgraph_node *node)
+{
+  for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
+    {
+      cs->in_polymorphic_cdtor = true;
+      if (!cs->inline_failed)
+       mark_all_inlined_calls_cdtor (cs->callee);
+    }
+  for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
+    cs->in_polymorphic_cdtor = true;
 }
 
 
@@ -260,7 +316,7 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
    indirect edges are discovered in the process, add them to NEW_EDGES, unless
    it is NULL. If UPDATE_OVERALL_SUMMARY is false, do not bother to recompute overall
    size of caller after inlining. Caller is required to eventually do it via
-   inline_update_overall_summary.
+   ipa_update_overall_fn_summary.
    If callee_removed is non-NULL, set it to true if we removed callee node.
 
    Return true iff any new callgraph edges were discovered as a
@@ -275,26 +331,131 @@ inline_call (struct cgraph_edge *e, bool update_original,
   int old_size = 0, new_size = 0;
   struct cgraph_node *to = NULL;
   struct cgraph_edge *curr = e;
+  bool comdat_local = e->callee->comdat_local_p ();
   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
   bool new_edges_found = false;
 
-#ifdef ENABLE_CHECKING
-  int estimated_growth = estimate_edge_growth (e);
+  int estimated_growth = 0;
+  if (! update_overall_summary)
+    estimated_growth = estimate_edge_growth (e);
+  /* This is used only for assert bellow.  */
+#if 0
   bool predicated = inline_edge_summary (e)->predicate != NULL;
 #endif
 
-  speculation_removed = false;
   /* Don't inline inlined edges.  */
   gcc_assert (e->inline_failed);
   /* Don't even think of inlining inline clone.  */
-  gcc_assert (!callee->global.inlined_to);
+  gcc_assert (!callee->inlined_to);
+
+  to = e->caller;
+  if (to->inlined_to)
+    to = to->inlined_to;
+  if (to->thunk.thunk_p)
+    {
+      struct cgraph_node *target = to->callees->callee;
+      thunk_expansion = true;
+      symtab->call_cgraph_removal_hooks (to);
+      if (in_lto_p)
+       to->get_untransformed_body ();
+      to->expand_thunk (false, true);
+      /* When thunk is instrumented we may have multiple callees.  */
+      for (e = to->callees; e && e->callee != target; e = e->next_callee)
+       ;
+      symtab->call_cgraph_insertion_hooks (to);
+      thunk_expansion = false;
+      gcc_assert (e);
+    }
+
 
   e->inline_failed = CIF_OK;
   DECL_POSSIBLY_INLINED (callee->decl) = true;
 
-  to = e->caller;
-  if (to->global.inlined_to)
-    to = to->global.inlined_to;
+  if (DECL_FUNCTION_PERSONALITY (callee->decl))
+    DECL_FUNCTION_PERSONALITY (to->decl)
+      = DECL_FUNCTION_PERSONALITY (callee->decl);
+
+  bool reload_optimization_node = false;
+  if (!opt_for_fn (callee->decl, flag_strict_aliasing)
+      && opt_for_fn (to->decl, flag_strict_aliasing))
+    {
+      struct gcc_options opts = global_options;
+
+      cl_optimization_restore (&opts, opts_for_fn (to->decl));
+      opts.x_flag_strict_aliasing = false;
+      if (dump_file)
+       fprintf (dump_file, "Dropping flag_strict_aliasing on %s\n",
+                to->dump_name ());
+      DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl)
+        = build_optimization_node (&opts);
+      reload_optimization_node = true;
+    }
+
+  ipa_fn_summary *caller_info = ipa_fn_summaries->get (to);
+  ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
+  if (!caller_info->fp_expressions && callee_info->fp_expressions)
+    {
+      caller_info->fp_expressions = true;
+      if (opt_for_fn (callee->decl, flag_rounding_math)
+         != opt_for_fn (to->decl, flag_rounding_math)
+         || opt_for_fn (callee->decl, flag_trapping_math)
+            != opt_for_fn (to->decl, flag_trapping_math)
+         || opt_for_fn (callee->decl, flag_unsafe_math_optimizations)
+            != opt_for_fn (to->decl, flag_unsafe_math_optimizations)
+         || opt_for_fn (callee->decl, flag_finite_math_only)
+            != opt_for_fn (to->decl, flag_finite_math_only)
+         || opt_for_fn (callee->decl, flag_signaling_nans)
+            != opt_for_fn (to->decl, flag_signaling_nans)
+         || opt_for_fn (callee->decl, flag_cx_limited_range)
+            != opt_for_fn (to->decl, flag_cx_limited_range)
+         || opt_for_fn (callee->decl, flag_signed_zeros)
+            != opt_for_fn (to->decl, flag_signed_zeros)
+         || opt_for_fn (callee->decl, flag_associative_math)
+            != opt_for_fn (to->decl, flag_associative_math)
+         || opt_for_fn (callee->decl, flag_reciprocal_math)
+            != opt_for_fn (to->decl, flag_reciprocal_math)
+         || opt_for_fn (callee->decl, flag_fp_int_builtin_inexact)
+            != opt_for_fn (to->decl, flag_fp_int_builtin_inexact)
+         || opt_for_fn (callee->decl, flag_errno_math)
+            != opt_for_fn (to->decl, flag_errno_math))
+       {
+         struct gcc_options opts = global_options;
+
+         cl_optimization_restore (&opts, opts_for_fn (to->decl));
+         opts.x_flag_rounding_math
+           = opt_for_fn (callee->decl, flag_rounding_math);
+         opts.x_flag_trapping_math
+           = opt_for_fn (callee->decl, flag_trapping_math);
+         opts.x_flag_unsafe_math_optimizations
+           = opt_for_fn (callee->decl, flag_unsafe_math_optimizations);
+         opts.x_flag_finite_math_only
+           = opt_for_fn (callee->decl, flag_finite_math_only);
+         opts.x_flag_signaling_nans
+           = opt_for_fn (callee->decl, flag_signaling_nans);
+         opts.x_flag_cx_limited_range
+           = opt_for_fn (callee->decl, flag_cx_limited_range);
+         opts.x_flag_signed_zeros
+           = opt_for_fn (callee->decl, flag_signed_zeros);
+         opts.x_flag_associative_math
+           = opt_for_fn (callee->decl, flag_associative_math);
+         opts.x_flag_reciprocal_math
+           = opt_for_fn (callee->decl, flag_reciprocal_math);
+         opts.x_flag_fp_int_builtin_inexact
+           = opt_for_fn (callee->decl, flag_fp_int_builtin_inexact);
+         opts.x_flag_errno_math
+           = opt_for_fn (callee->decl, flag_errno_math);
+         if (dump_file)
+           fprintf (dump_file, "Copying FP flags from %s to %s\n",
+                    callee->dump_name (), to->dump_name ());
+         DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl)
+            = build_optimization_node (&opts);
+         reload_optimization_node = true;
+       }
+    }
+
+  /* Reload global optimization flags.  */
+  if (reload_optimization_node && DECL_STRUCT_FUNCTION (to->decl) == cfun)
+    set_cfun (cfun, true);
 
   /* If aliases are involved, redirect edge to the actual destination and
      possibly remove the aliases.  */
@@ -305,7 +466,8 @@ inline_call (struct cgraph_edge *e, bool update_original,
       while (alias && alias != callee)
        {
          if (!alias->callers
-             && can_remove_node_now_p (alias, e))
+             && can_remove_node_now_p (alias,
+                                       !e->next_caller && !e->prev_caller ? e : NULL))
            {
              next_alias = alias->get_alias_target ();
              alias->remove ();
@@ -318,33 +480,38 @@ inline_call (struct cgraph_edge *e, bool update_original,
        }
     }
 
-  clone_inlined_nodes (e, true, update_original, overall_size, e->frequency);
+  clone_inlined_nodes (e, true, update_original, overall_size);
 
-  gcc_assert (curr->callee->global.inlined_to == to);
+  gcc_assert (curr->callee->inlined_to == to);
 
-  old_size = inline_summaries->get (to)->size;
-  inline_merge_summary (e);
-  if (optimize)
+  old_size = ipa_size_summaries->get (to)->size;
+  ipa_merge_fn_summary_after_inlining (e);
+  if (e->in_polymorphic_cdtor)
+    mark_all_inlined_calls_cdtor (e->callee);
+  if (opt_for_fn (e->caller->decl, optimize))
     new_edges_found = ipa_propagate_indirect_call_infos (curr, new_edges);
+  bool removed_p = check_speculations (e->callee, new_edges);
   if (update_overall_summary)
-   inline_update_overall_summary (to);
-  new_size = inline_summaries->get (to)->size;
+    ipa_update_overall_fn_summary (to, new_edges_found || removed_p);
+  else
+    /* Update self size by the estimate so overall function growth limits
+       work for further inlining into this function.  Before inlining
+       the function we inlined to again we expect the caller to update
+       the overall summary.  */
+    ipa_size_summaries->get (to)->size += estimated_growth;
+  new_size = ipa_size_summaries->get (to)->size;
 
   if (callee->calls_comdat_local)
     to->calls_comdat_local = true;
-  else if (to->calls_comdat_local && callee->comdat_local_p ())
-    {
-      struct cgraph_edge *se = to->callees;
-      for (; se; se = se->next_callee)
-       if (se->inline_failed && se->callee->comdat_local_p ())
-         break;
-      if (se == NULL)
-       to->calls_comdat_local = false;
-    }
+  else if (to->calls_comdat_local && comdat_local)
+    to->calls_comdat_local = to->check_calls_comdat_local_p ();
 
-#ifdef ENABLE_CHECKING
+  /* FIXME: This assert suffers from roundoff errors, disable it for GCC 5
+     and revisit it after conversion to sreals in GCC 6.
+     See PR 65654.  */
+#if 0
   /* Verify that estimated growth match real growth.  Allow off-by-one
-     error due to INLINE_SIZE_SCALE roudoff errors.  */
+     error due to ipa_fn_summary::size_scale roudoff errors.  */
   gcc_assert (!update_overall_summary || !overall_size || new_edges_found
              || abs (estimated_growth - (new_size - old_size)) <= 1
              || speculation_removed
@@ -355,16 +522,20 @@ inline_call (struct cgraph_edge *e, bool update_original,
 
   /* Account the change of overall unit size; external functions will be
      removed and are thus not accounted.  */
-  if (overall_size
-      && !DECL_EXTERNAL (to->decl))
+  if (overall_size && inline_account_function_p (to))
     *overall_size += new_size - old_size;
   ncalls_inlined++;
 
-  /* This must happen after inline_merge_summary that rely on jump
+  /* This must happen after ipa_merge_fn_summary_after_inlining that rely on jump
      functions of callee to not be updated.  */
   return new_edges_found;
 }
 
+/* For each node that was made the holder of function body by
+   save_inline_function_body, this summary contains pointer to the previous
+   holder of the body.  */
+
+function_summary <tree *> *ipa_saved_clone_sources;
 
 /* Copy function body of NODE and redirect all inline clones to it.
    This is done before inline plan is applied to NODE when there are
@@ -380,12 +551,28 @@ save_inline_function_body (struct cgraph_node *node)
 
   if (dump_file)
     fprintf (dump_file, "\nSaving body of %s for later reuse\n",
-            node->name ());
+            node->dump_name ());
  
   gcc_assert (node == cgraph_node::get (node->decl));
 
   /* first_clone will be turned into real function.  */
   first_clone = node->clones;
+
+  /* Arrange first clone to not be thunk as those do not have bodies.  */
+  if (first_clone->thunk.thunk_p)
+    {
+      while (first_clone->thunk.thunk_p)
+        first_clone = first_clone->next_sibling_clone;
+      first_clone->prev_sibling_clone->next_sibling_clone
+       = first_clone->next_sibling_clone;
+      if (first_clone->next_sibling_clone)
+       first_clone->next_sibling_clone->prev_sibling_clone
+          = first_clone->prev_sibling_clone;
+      first_clone->next_sibling_clone = node->clones;
+      first_clone->prev_sibling_clone = NULL;
+      node->clones->prev_sibling_clone = first_clone;
+      node->clones = first_clone;
+    }
   first_clone->decl = copy_node (node->decl);
   first_clone->decl->decl_with_vis.symtab_node = first_clone;
   gcc_assert (first_clone == cgraph_node::get (first_clone->decl));
@@ -394,7 +581,8 @@ save_inline_function_body (struct cgraph_node *node)
      first_clone.  */
   if (first_clone->next_sibling_clone)
     {
-      for (n = first_clone->next_sibling_clone; n->next_sibling_clone; n = n->next_sibling_clone)
+      for (n = first_clone->next_sibling_clone; n->next_sibling_clone;
+          n = n->next_sibling_clone)
         n->clone_of = first_clone;
       n->clone_of = first_clone;
       n->next_sibling_clone = first_clone->clones;
@@ -405,6 +593,22 @@ save_inline_function_body (struct cgraph_node *node)
       first_clone->next_sibling_clone = NULL;
       gcc_assert (!first_clone->prev_sibling_clone);
     }
+
+  tree prev_body_holder = node->decl;
+  if (!ipa_saved_clone_sources)
+    ipa_saved_clone_sources = new function_summary <tree *> (symtab);
+  else
+    {
+      tree *p = ipa_saved_clone_sources->get (node);
+      if (p)
+       {
+         prev_body_holder = *p;
+         gcc_assert (prev_body_holder);
+       }
+    }
+  *ipa_saved_clone_sources->get_create (first_clone) = prev_body_holder;
+  first_clone->former_clone_of
+    = node->former_clone_of ? node->former_clone_of : node->decl;
   first_clone->clone_of = NULL;
 
   /* Now node in question has no clones.  */
@@ -433,8 +637,7 @@ save_inline_function_body (struct cgraph_node *node)
 
   /* Copy the OLD_VERSION_NODE function tree to the new version.  */
   tree_function_versioning (node->decl, first_clone->decl,
-                           NULL, true, NULL, false,
-                           NULL, NULL);
+                           NULL, NULL, true, NULL, NULL);
 
   /* The function will be short lived and removed after we inline all the clones,
      but make it internal so we won't confuse ourself.  */
@@ -446,17 +649,16 @@ save_inline_function_body (struct cgraph_node *node)
   /* When doing recursive inlining, the clone may become unnecessary.
      This is possible i.e. in the case when the recursive function is proved to be
      non-throwing and the recursion happens only in the EH landing pad.
-     We can not remove the clone until we are done with saving the body.
+     We cannot remove the clone until we are done with saving the body.
      Remove it now.  */
   if (!first_clone->callers)
     {
       first_clone->remove_symbol_and_inline_clones ();
       first_clone = NULL;
     }
-#ifdef ENABLE_CHECKING
-  else
+  else if (flag_checking)
     first_clone->verify ();
-#endif
+
   return first_clone;
 }
 
@@ -468,9 +670,10 @@ preserve_function_body_p (struct cgraph_node *node)
   gcc_assert (symtab->global_info_ready);
   gcc_assert (!node->alias && !node->thunk.thunk_p);
 
-  /* Look if there is any clone around.  */
-  if (node->clones)
-    return true;
+  /* Look if there is any non-thunk clone around.  */
+  for (node = node->clones; node; node = node->next_sibling_clone)
+    if (!node->thunk.thunk_p)
+      return true;
   return false;
 }
 
@@ -493,18 +696,45 @@ inline_transform (struct cgraph_node *node)
   if (preserve_function_body_p (node))
     save_inline_function_body (node);
 
+  profile_count num = node->count;
+  profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+  bool scale = num.initialized_p () && !(num == den);
+  if (scale)
+    {
+      profile_count::adjust_for_ipa_scaling (&num, &den);
+      if (dump_file)
+       {
+         fprintf (dump_file, "Applying count scale ");
+         num.dump (dump_file);
+         fprintf (dump_file, "/");
+         den.dump (dump_file);
+         fprintf (dump_file, "\n");
+       }
+
+      basic_block bb;
+      cfun->cfg->count_max = profile_count::uninitialized ();
+      FOR_ALL_BB_FN (bb, cfun)
+       {
+         bb->count = bb->count.apply_scale (num, den);
+         cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
+       }
+      ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count;
+    }
+
   for (e = node->callees; e; e = next)
     {
       if (!e->inline_failed)
        has_inline = true;
       next = e->next_callee;
-      e->redirect_call_stmt_to_callee ();
+      cgraph_edge::redirect_call_stmt_to_callee (e);
     }
   node->remove_all_references ();
 
   timevar_push (TV_INTEGRATION);
-  if (node->callees && (optimize || has_inline))
-    todo = optimize_inline_calls (current_function_decl);
+  if (node->callees && (opt_for_fn (node->decl, optimize) || has_inline))
+    {
+      todo = optimize_inline_calls (current_function_decl);
+    }
   timevar_pop (TV_INTEGRATION);
 
   cfun->always_inline_functions_inlined = true;