1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
58 #include "fold-const.h"
60 #include "dominance.h"
62 #include "basic-block.h"
65 #include "plugin-api.h"
66 #include "hard-reg-set.h"
71 #include "tree-pass.h"
72 #include "tree-ssa-alias.h"
73 #include "internal-fn.h"
74 #include "gimple-expr.h"
76 #include "gimple-iterator.h"
79 #include "tree-iterator.h"
80 #include "ipa-utils.h"
83 #include "value-prof.h"
84 #include "alloc-pool.h"
85 #include "tree-inline.h"
86 #include "lto-streamer.h"
87 #include "data-streamer.h"
88 #include "symbol-summary.h"
90 #include "ipa-inline.h"
92 /* Entry in the histogram. */
94 struct histogram_entry
101 /* Histogram of profile values.
102 The histogram is represented as an ordered vector of entries allocated via
103 histogram_pool. During construction a separate hashtable is kept to lookup
104 duplicate entries. */
106 vec
<histogram_entry
*> histogram
;
107 static pool_allocator
<histogram_entry
> histogram_pool
108 ("IPA histogram", 10);
110 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
112 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
114 typedef histogram_entry
*value_type
;
115 typedef histogram_entry
*compare_type
;
116 static inline hashval_t
hash (const histogram_entry
*);
117 static inline int equal (const histogram_entry
*, const histogram_entry
*);
121 histogram_hash::hash (const histogram_entry
*val
)
127 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
129 return val
->count
== val2
->count
;
132 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
133 HASHTABLE is the on-side hash kept to avoid duplicates. */
136 account_time_size (hash_table
<histogram_hash
> *hashtable
,
137 vec
<histogram_entry
*> &histogram
,
138 gcov_type count
, int time
, int size
)
140 histogram_entry key
= {count
, 0, 0};
141 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
145 *val
= histogram_pool
.allocate ();
147 histogram
.safe_push (*val
);
149 (*val
)->time
+= time
;
150 (*val
)->size
+= size
;
154 cmp_counts (const void *v1
, const void *v2
)
156 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
157 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
158 if (h1
->count
< h2
->count
)
160 if (h1
->count
> h2
->count
)
165 /* Dump HISTOGRAM to FILE. */
168 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
171 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
173 fprintf (dump_file
, "Histogram:\n");
174 for (i
= 0; i
< histogram
.length (); i
++)
176 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
177 overall_size
+= histogram
[i
]->size
;
183 for (i
= 0; i
< histogram
.length (); i
++)
185 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
186 cumulated_size
+= histogram
[i
]->size
;
187 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
188 (int64_t) histogram
[i
]->count
,
190 cumulated_time
* 100.0 / overall_time
,
192 cumulated_size
* 100.0 / overall_size
);
196 /* Collect histogram from CFG profiles. */
199 ipa_profile_generate_summary (void)
201 struct cgraph_node
*node
;
202 gimple_stmt_iterator gsi
;
205 hash_table
<histogram_hash
> hashtable (10);
207 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
208 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
212 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
214 gimple stmt
= gsi_stmt (gsi
);
215 if (gimple_code (stmt
) == GIMPLE_CALL
216 && !gimple_call_fndecl (stmt
))
219 h
= gimple_histogram_value_of_type
220 (DECL_STRUCT_FUNCTION (node
->decl
),
221 stmt
, HIST_TYPE_INDIR_CALL
);
222 /* No need to do sanity check: gimple_ic_transform already
223 takes away bad histograms. */
226 /* counter 0 is target, counter 1 is number of execution we called target,
227 counter 2 is total number of executions. */
228 if (h
->hvalue
.counters
[2])
230 struct cgraph_edge
* e
= node
->get_edge (stmt
);
231 if (e
&& !e
->indirect_unknown_callee
)
233 e
->indirect_info
->common_target_id
234 = h
->hvalue
.counters
[0];
235 e
->indirect_info
->common_target_probability
236 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
237 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
240 fprintf (dump_file
, "Probability capped to 1\n");
241 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
244 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
248 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
249 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
251 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
253 histogram
.qsort (cmp_counts
);
256 /* Serialize the ipa info for lto. */
259 ipa_profile_write_summary (void)
261 struct lto_simple_output_block
*ob
262 = lto_create_simple_output_block (LTO_section_ipa_profile
);
265 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
266 for (i
= 0; i
< histogram
.length (); i
++)
268 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
269 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
270 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
272 lto_destroy_simple_output_block (ob
);
275 /* Deserialize the ipa info for lto. */
278 ipa_profile_read_summary (void)
280 struct lto_file_decl_data
** file_data_vec
281 = lto_get_file_decl_data ();
282 struct lto_file_decl_data
* file_data
;
285 hash_table
<histogram_hash
> hashtable (10);
287 while ((file_data
= file_data_vec
[j
++]))
291 struct lto_input_block
*ib
292 = lto_create_simple_input_block (file_data
,
293 LTO_section_ipa_profile
,
297 unsigned int num
= streamer_read_uhwi (ib
);
299 for (n
= 0; n
< num
; n
++)
301 gcov_type count
= streamer_read_gcov_count (ib
);
302 int time
= streamer_read_uhwi (ib
);
303 int size
= streamer_read_uhwi (ib
);
304 account_time_size (&hashtable
, histogram
,
307 lto_destroy_simple_input_block (file_data
,
308 LTO_section_ipa_profile
,
312 histogram
.qsort (cmp_counts
);
315 /* Data used by ipa_propagate_frequency. */
317 struct ipa_propagate_frequency_data
319 cgraph_node
*function_symbol
;
320 bool maybe_unlikely_executed
;
321 bool maybe_executed_once
;
322 bool only_called_at_startup
;
323 bool only_called_at_exit
;
326 /* Worker for ipa_propagate_frequency_1. */
329 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
331 struct ipa_propagate_frequency_data
*d
;
332 struct cgraph_edge
*edge
;
334 d
= (struct ipa_propagate_frequency_data
*)data
;
335 for (edge
= node
->callers
;
336 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
337 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
338 edge
= edge
->next_caller
)
340 if (edge
->caller
!= d
->function_symbol
)
342 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
343 /* It makes sense to put main() together with the static constructors.
344 It will be executed for sure, but rest of functions called from
345 main are definitely not at startup only. */
346 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
347 d
->only_called_at_startup
= 0;
348 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
351 /* When profile feedback is available, do not try to propagate too hard;
352 counts are already good guide on function frequencies and roundoff
353 errors can make us to push function into unlikely section even when
354 it is executed by the train run. Transfer the function only if all
355 callers are unlikely executed. */
357 && opt_for_fn (d
->function_symbol
->decl
, flag_branch_probabilities
)
358 /* Thunks are not profiled. This is more or less implementation
360 && !d
->function_symbol
->thunk
.thunk_p
361 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
362 || (edge
->caller
->global
.inlined_to
363 && edge
->caller
->global
.inlined_to
->frequency
364 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
365 d
->maybe_unlikely_executed
= false;
366 if (!edge
->frequency
)
368 switch (edge
->caller
->frequency
)
370 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
372 case NODE_FREQUENCY_EXECUTED_ONCE
:
373 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
374 fprintf (dump_file
, " Called by %s that is executed once\n",
375 edge
->caller
->name ());
376 d
->maybe_unlikely_executed
= false;
377 if (inline_edge_summary (edge
)->loop_depth
)
379 d
->maybe_executed_once
= false;
380 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
381 fprintf (dump_file
, " Called in loop\n");
384 case NODE_FREQUENCY_HOT
:
385 case NODE_FREQUENCY_NORMAL
:
386 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
387 fprintf (dump_file
, " Called by %s that is normal or hot\n",
388 edge
->caller
->name ());
389 d
->maybe_unlikely_executed
= false;
390 d
->maybe_executed_once
= false;
397 /* Return ture if NODE contains hot calls. */
400 contains_hot_call_p (struct cgraph_node
*node
)
402 struct cgraph_edge
*e
;
403 for (e
= node
->callees
; e
; e
= e
->next_callee
)
404 if (e
->maybe_hot_p ())
406 else if (!e
->inline_failed
407 && contains_hot_call_p (e
->callee
))
409 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
410 if (e
->maybe_hot_p ())
415 /* See if the frequency of NODE can be updated based on frequencies of its
418 ipa_propagate_frequency (struct cgraph_node
*node
)
420 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
421 bool changed
= false;
423 /* We can not propagate anything useful about externally visible functions
424 nor about virtuals. */
425 if (!node
->local
.local
427 || (opt_for_fn (node
->decl
, flag_devirtualize
)
428 && DECL_VIRTUAL_P (node
->decl
)))
430 gcc_assert (node
->analyzed
);
431 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
432 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
434 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
437 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
438 && !node
->only_called_at_startup
)
440 node
->only_called_at_startup
= true;
442 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
446 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
447 && !node
->only_called_at_exit
)
449 node
->only_called_at_exit
= true;
451 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
456 /* With profile we can decide on hot/normal based on count. */
460 if (node
->count
>= get_hot_bb_threshold ())
463 hot
|= contains_hot_call_p (node
);
466 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
469 fprintf (dump_file
, "Node %s promoted to hot.\n",
471 node
->frequency
= NODE_FREQUENCY_HOT
;
476 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
479 fprintf (dump_file
, "Node %s reduced to normal.\n",
481 node
->frequency
= NODE_FREQUENCY_NORMAL
;
485 /* These come either from profile or user hints; never update them. */
486 if (node
->frequency
== NODE_FREQUENCY_HOT
487 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
489 if (d
.maybe_unlikely_executed
)
491 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
493 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
497 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
499 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
501 fprintf (dump_file
, "Node %s promoted to executed once.\n",
508 /* Simple ipa profile pass propagating frequencies across the callgraph. */
513 struct cgraph_node
**order
;
514 struct cgraph_edge
*e
;
516 bool something_changed
= false;
518 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
519 struct cgraph_node
*n
,*n2
;
520 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
521 int nmismatch
= 0, nimpossible
= 0;
522 bool node_map_initialized
= false;
525 dump_histogram (dump_file
, histogram
);
526 for (i
= 0; i
< (int)histogram
.length (); i
++)
528 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
529 overall_size
+= histogram
[i
]->size
;
535 gcc_assert (overall_size
);
538 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
540 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
541 (int64_t)overall_time
);
542 min
= get_hot_bb_threshold ();
543 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
546 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
547 cumulated_size
+= histogram
[i
]->size
;
549 fprintf (dump_file
, "GCOV min count: %" PRId64
550 " Time:%3.2f%% Size:%3.2f%%\n",
552 cumulated_time
* 100.0 / overall_time
,
553 cumulated_size
* 100.0 / overall_size
);
555 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
557 for (i
= 0; cumulated
< cutoff
; i
++)
559 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
560 threshold
= histogram
[i
]->count
;
566 gcov_type cumulated_time
= 0, cumulated_size
= 0;
569 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
572 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
573 cumulated_size
+= histogram
[i
]->size
;
575 fprintf (dump_file
, "Determined min count: %" PRId64
576 " Time:%3.2f%% Size:%3.2f%%\n",
578 cumulated_time
* 100.0 / overall_time
,
579 cumulated_size
* 100.0 / overall_size
);
581 if (threshold
> get_hot_bb_threshold ()
585 fprintf (dump_file
, "Threshold updated.\n");
586 set_hot_bb_threshold (threshold
);
589 histogram
.release ();
590 histogram_pool
.release ();
592 /* Produce speculative calls: we saved common traget from porfiling into
593 e->common_target_id. Now, at link time, we can look up corresponding
594 function node and produce speculative call. */
596 FOR_EACH_DEFINED_FUNCTION (n
)
600 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
603 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
607 if (e
->indirect_info
->common_target_id
)
609 if (!node_map_initialized
)
610 init_node_map (false);
611 node_map_initialized
= true;
613 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
618 fprintf (dump_file
, "Indirect call -> direct call from"
619 " other module %s/%i => %s/%i, prob %3.2f\n",
620 xstrdup_for_dump (n
->name ()), n
->order
,
621 xstrdup_for_dump (n2
->name ()), n2
->order
,
622 e
->indirect_info
->common_target_probability
623 / (float)REG_BR_PROB_BASE
);
625 if (e
->indirect_info
->common_target_probability
626 < REG_BR_PROB_BASE
/ 2)
631 "Not speculating: probability is too low.\n");
633 else if (!e
->maybe_hot_p ())
638 "Not speculating: call is cold.\n");
640 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
641 && n2
->can_be_discarded_p ())
646 "Not speculating: target is overwritable "
647 "and can be discarded.\n");
649 else if (ipa_node_params_sum
&& ipa_edge_args_vector
650 && !IPA_NODE_REF (n2
)->descriptors
.is_empty ()
651 && ipa_get_param_count (IPA_NODE_REF (n2
))
652 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
653 && (ipa_get_param_count (IPA_NODE_REF (n2
))
654 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
655 || !stdarg_p (TREE_TYPE (n2
->decl
))))
661 "parameter count mistmatch\n");
663 else if (e
->indirect_info
->polymorphic
664 && !opt_for_fn (n
->decl
, flag_devirtualize
)
665 && !possible_polymorphic_call_target_p (e
, n2
))
671 "function is not in the polymorphic "
672 "call target list\n");
676 /* Target may be overwritable, but profile says that
677 control flow goes to this particular implementation
678 of N2. Speculate on the local alias to allow inlining.
680 if (!n2
->can_be_discarded_p ())
683 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
690 apply_scale (e
->count
,
691 e
->indirect_info
->common_target_probability
),
692 apply_scale (e
->frequency
,
693 e
->indirect_info
->common_target_probability
));
700 fprintf (dump_file
, "Function with profile-id %i not found.\n",
701 e
->indirect_info
->common_target_id
);
707 inline_update_overall_summary (n
);
709 if (node_map_initialized
)
711 if (dump_file
&& nindirect
)
713 "%i indirect calls trained.\n"
714 "%i (%3.2f%%) have common target.\n"
715 "%i (%3.2f%%) targets was not found.\n"
716 "%i (%3.2f%%) targets had parameter count mismatch.\n"
717 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
718 "%i (%3.2f%%) speculations seems useless.\n"
719 "%i (%3.2f%%) speculations produced.\n",
721 ncommon
, ncommon
* 100.0 / nindirect
,
722 nunknown
, nunknown
* 100.0 / nindirect
,
723 nmismatch
, nmismatch
* 100.0 / nindirect
,
724 nimpossible
, nimpossible
* 100.0 / nindirect
,
725 nuseless
, nuseless
* 100.0 / nindirect
,
726 nconverted
, nconverted
* 100.0 / nindirect
);
728 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
729 order_pos
= ipa_reverse_postorder (order
);
730 for (i
= order_pos
- 1; i
>= 0; i
--)
732 if (order
[i
]->local
.local
733 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
734 && ipa_propagate_frequency (order
[i
]))
736 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
737 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
739 something_changed
= true;
740 e
->callee
->aux
= (void *)1;
743 order
[i
]->aux
= NULL
;
746 while (something_changed
)
748 something_changed
= false;
749 for (i
= order_pos
- 1; i
>= 0; i
--)
752 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
753 && ipa_propagate_frequency (order
[i
]))
755 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
756 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
758 something_changed
= true;
759 e
->callee
->aux
= (void *)1;
762 order
[i
]->aux
= NULL
;
771 const pass_data pass_data_ipa_profile
=
774 "profile_estimate", /* name */
775 OPTGROUP_NONE
, /* optinfo_flags */
776 TV_IPA_PROFILE
, /* tv_id */
777 0, /* properties_required */
778 0, /* properties_provided */
779 0, /* properties_destroyed */
780 0, /* todo_flags_start */
781 0, /* todo_flags_finish */
784 class pass_ipa_profile
: public ipa_opt_pass_d
787 pass_ipa_profile (gcc::context
*ctxt
)
788 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
789 ipa_profile_generate_summary
, /* generate_summary */
790 ipa_profile_write_summary
, /* write_summary */
791 ipa_profile_read_summary
, /* read_summary */
792 NULL
, /* write_optimization_summary */
793 NULL
, /* read_optimization_summary */
794 NULL
, /* stmt_fixup */
795 0, /* function_transform_todo_flags_start */
796 NULL
, /* function_transform */
797 NULL
) /* variable_transform */
800 /* opt_pass methods: */
801 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
802 virtual unsigned int execute (function
*) { return ipa_profile (); }
804 }; // class pass_ipa_profile
809 make_pass_ipa_profile (gcc::context
*ctxt
)
811 return new pass_ipa_profile (ctxt
);