]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/ipa-profile.c
gcc/
[thirdparty/gcc.git] / gcc / ipa-profile.c
CommitLineData
08f835dc 1/* Basic IPA optimizations based on profile.
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
08f835dc
JH
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
daf5c770
JH
20/* ipa-profile pass implements the following analysis propagating profille
21 inter-procedurally.
22
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
1c5fd343 25 from profile feedback. This histogram is complete only with LTO,
daf5c770
JH
26 otherwise it contains information only about the current unit.
27
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
34
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
40
41 This call may or may not survive through IPA optimization based on decision
42 of inliner.
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
08f835dc
JH
47#include "config.h"
48#include "system.h"
49#include "coretypes.h"
50#include "tm.h"
40e23961
MC
51#include "alias.h"
52#include "symtab.h"
4d648807 53#include "tree.h"
40e23961 54#include "fold-const.h"
60393bbc
AM
55#include "predict.h"
56#include "dominance.h"
57#include "cfg.h"
58#include "basic-block.h"
c582198b 59#include "plugin-api.h"
c582198b 60#include "hard-reg-set.h"
c582198b
AM
61#include "function.h"
62#include "ipa-ref.h"
08f835dc
JH
63#include "cgraph.h"
64#include "tree-pass.h"
2fb9a547
AM
65#include "tree-ssa-alias.h"
66#include "internal-fn.h"
67#include "gimple-expr.h"
08f835dc 68#include "gimple.h"
5be5c238 69#include "gimple-iterator.h"
08f835dc
JH
70#include "flags.h"
71#include "target.h"
72#include "tree-iterator.h"
73#include "ipa-utils.h"
08f835dc
JH
74#include "profile.h"
75#include "params.h"
76#include "value-prof.h"
77#include "alloc-pool.h"
78#include "tree-inline.h"
79#include "lto-streamer.h"
80#include "data-streamer.h"
dd912cb8 81#include "symbol-summary.h"
c582198b 82#include "ipa-prop.h"
08f835dc
JH
83#include "ipa-inline.h"
84
85/* Entry in the histogram. */
86
87struct histogram_entry
88{
89 gcov_type count;
90 int time;
91 int size;
92};
93
94/* Histogram of profile values.
95 The histogram is represented as an ordered vector of entries allocated via
96 histogram_pool. During construction a separate hashtable is kept to lookup
97 duplicate entries. */
98
99vec<histogram_entry *> histogram;
d7809518
ML
100static pool_allocator<histogram_entry> histogram_pool
101 ("IPA histogram", 10);
08f835dc
JH
102
103/* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
104
105struct histogram_hash : typed_noop_remove <histogram_entry>
106{
67f58944
TS
107 typedef histogram_entry *value_type;
108 typedef histogram_entry *compare_type;
109 static inline hashval_t hash (const histogram_entry *);
110 static inline int equal (const histogram_entry *, const histogram_entry *);
08f835dc
JH
111};
112
113inline hashval_t
114histogram_hash::hash (const histogram_entry *val)
115{
116 return val->count;
117}
118
119inline int
120histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
121{
122 return val->count == val2->count;
123}
124
125/* Account TIME and SIZE executed COUNT times into HISTOGRAM.
126 HASHTABLE is the on-side hash kept to avoid duplicates. */
127
128static void
c203e8a7 129account_time_size (hash_table<histogram_hash> *hashtable,
08f835dc
JH
130 vec<histogram_entry *> &histogram,
131 gcov_type count, int time, int size)
132{
133 histogram_entry key = {count, 0, 0};
c203e8a7 134 histogram_entry **val = hashtable->find_slot (&key, INSERT);
08f835dc
JH
135
136 if (!*val)
137 {
d7809518 138 *val = histogram_pool.allocate ();
08f835dc
JH
139 **val = key;
140 histogram.safe_push (*val);
141 }
142 (*val)->time += time;
143 (*val)->size += size;
144}
145
146int
147cmp_counts (const void *v1, const void *v2)
148{
149 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
150 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
151 if (h1->count < h2->count)
152 return 1;
153 if (h1->count > h2->count)
154 return -1;
155 return 0;
156}
157
158/* Dump HISTOGRAM to FILE. */
159
160static void
161dump_histogram (FILE *file, vec<histogram_entry *> histogram)
162{
163 unsigned int i;
164 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
165
166 fprintf (dump_file, "Histogram:\n");
167 for (i = 0; i < histogram.length (); i++)
168 {
169 overall_time += histogram[i]->count * histogram[i]->time;
170 overall_size += histogram[i]->size;
171 }
172 if (!overall_time)
173 overall_time = 1;
174 if (!overall_size)
175 overall_size = 1;
176 for (i = 0; i < histogram.length (); i++)
177 {
178 cumulated_time += histogram[i]->count * histogram[i]->time;
179 cumulated_size += histogram[i]->size;
16998094 180 fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
a9243bfc 181 (int64_t) histogram[i]->count,
08f835dc
JH
182 histogram[i]->time,
183 cumulated_time * 100.0 / overall_time,
184 histogram[i]->size,
185 cumulated_size * 100.0 / overall_size);
186 }
187}
188
189/* Collect histogram from CFG profiles. */
190
191static void
192ipa_profile_generate_summary (void)
193{
194 struct cgraph_node *node;
195 gimple_stmt_iterator gsi;
08f835dc
JH
196 basic_block bb;
197
c203e8a7 198 hash_table<histogram_hash> hashtable (10);
08f835dc
JH
199
200 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
67348ccc 201 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
08f835dc
JH
202 {
203 int time = 0;
204 int size = 0;
205 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
206 {
207 gimple stmt = gsi_stmt (gsi);
208 if (gimple_code (stmt) == GIMPLE_CALL
209 && !gimple_call_fndecl (stmt))
210 {
211 histogram_value h;
212 h = gimple_histogram_value_of_type
67348ccc 213 (DECL_STRUCT_FUNCTION (node->decl),
08f835dc
JH
214 stmt, HIST_TYPE_INDIR_CALL);
215 /* No need to do sanity check: gimple_ic_transform already
216 takes away bad histograms. */
217 if (h)
218 {
219 /* counter 0 is target, counter 1 is number of execution we called target,
220 counter 2 is total number of executions. */
221 if (h->hvalue.counters[2])
222 {
d52f5295 223 struct cgraph_edge * e = node->get_edge (stmt);
fd3c9a7e
JH
224 if (e && !e->indirect_unknown_callee)
225 continue;
08f835dc
JH
226 e->indirect_info->common_target_id
227 = h->hvalue.counters [0];
228 e->indirect_info->common_target_probability
229 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
230 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
231 {
232 if (dump_file)
233 fprintf (dump_file, "Probability capped to 1\n");
234 e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
235 }
236 }
67348ccc 237 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl),
08f835dc
JH
238 stmt, h);
239 }
240 }
241 time += estimate_num_insns (stmt, &eni_time_weights);
242 size += estimate_num_insns (stmt, &eni_size_weights);
243 }
c203e8a7 244 account_time_size (&hashtable, histogram, bb->count, time, size);
08f835dc 245 }
08f835dc
JH
246 histogram.qsort (cmp_counts);
247}
248
249/* Serialize the ipa info for lto. */
250
251static void
252ipa_profile_write_summary (void)
253{
254 struct lto_simple_output_block *ob
255 = lto_create_simple_output_block (LTO_section_ipa_profile);
256 unsigned int i;
257
c3284718 258 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
08f835dc
JH
259 for (i = 0; i < histogram.length (); i++)
260 {
261 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
262 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
263 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
264 }
265 lto_destroy_simple_output_block (ob);
266}
267
268/* Deserialize the ipa info for lto. */
269
270static void
271ipa_profile_read_summary (void)
272{
273 struct lto_file_decl_data ** file_data_vec
274 = lto_get_file_decl_data ();
275 struct lto_file_decl_data * file_data;
08f835dc
JH
276 int j = 0;
277
c203e8a7 278 hash_table<histogram_hash> hashtable (10);
08f835dc
JH
279
280 while ((file_data = file_data_vec[j++]))
281 {
282 const char *data;
283 size_t len;
284 struct lto_input_block *ib
285 = lto_create_simple_input_block (file_data,
286 LTO_section_ipa_profile,
287 &data, &len);
288 if (ib)
289 {
290 unsigned int num = streamer_read_uhwi (ib);
291 unsigned int n;
292 for (n = 0; n < num; n++)
293 {
294 gcov_type count = streamer_read_gcov_count (ib);
295 int time = streamer_read_uhwi (ib);
296 int size = streamer_read_uhwi (ib);
c203e8a7 297 account_time_size (&hashtable, histogram,
08f835dc
JH
298 count, time, size);
299 }
300 lto_destroy_simple_input_block (file_data,
301 LTO_section_ipa_profile,
302 ib, data, len);
303 }
304 }
08f835dc
JH
305 histogram.qsort (cmp_counts);
306}
307
308/* Data used by ipa_propagate_frequency. */
309
310struct ipa_propagate_frequency_data
311{
1ede94c5 312 cgraph_node *function_symbol;
08f835dc
JH
313 bool maybe_unlikely_executed;
314 bool maybe_executed_once;
315 bool only_called_at_startup;
316 bool only_called_at_exit;
317};
318
319/* Worker for ipa_propagate_frequency_1. */
320
321static bool
322ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
323{
324 struct ipa_propagate_frequency_data *d;
325 struct cgraph_edge *edge;
326
327 d = (struct ipa_propagate_frequency_data *)data;
328 for (edge = node->callers;
329 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
330 || d->only_called_at_startup || d->only_called_at_exit);
331 edge = edge->next_caller)
332 {
1ede94c5 333 if (edge->caller != d->function_symbol)
08f835dc
JH
334 {
335 d->only_called_at_startup &= edge->caller->only_called_at_startup;
336 /* It makes sense to put main() together with the static constructors.
337 It will be executed for sure, but rest of functions called from
338 main are definitely not at startup only. */
67348ccc 339 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
08f835dc
JH
340 d->only_called_at_startup = 0;
341 d->only_called_at_exit &= edge->caller->only_called_at_exit;
342 }
daf5c770
JH
343
344 /* When profile feedback is available, do not try to propagate too hard;
345 counts are already good guide on function frequencies and roundoff
346 errors can make us to push function into unlikely section even when
347 it is executed by the train run. Transfer the function only if all
348 callers are unlikely executed. */
1ede94c5
JH
349 if (profile_info
350 && opt_for_fn (d->function_symbol->decl, flag_branch_probabilities)
351 /* Thunks are not profiled. This is more or less implementation
352 bug. */
353 && !d->function_symbol->thunk.thunk_p
daf5c770
JH
354 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
355 || (edge->caller->global.inlined_to
356 && edge->caller->global.inlined_to->frequency
357 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
358 d->maybe_unlikely_executed = false;
08f835dc
JH
359 if (!edge->frequency)
360 continue;
361 switch (edge->caller->frequency)
362 {
363 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
364 break;
365 case NODE_FREQUENCY_EXECUTED_ONCE:
366 if (dump_file && (dump_flags & TDF_DETAILS))
367 fprintf (dump_file, " Called by %s that is executed once\n",
fec39fa6 368 edge->caller->name ());
08f835dc
JH
369 d->maybe_unlikely_executed = false;
370 if (inline_edge_summary (edge)->loop_depth)
371 {
372 d->maybe_executed_once = false;
373 if (dump_file && (dump_flags & TDF_DETAILS))
374 fprintf (dump_file, " Called in loop\n");
375 }
376 break;
377 case NODE_FREQUENCY_HOT:
378 case NODE_FREQUENCY_NORMAL:
379 if (dump_file && (dump_flags & TDF_DETAILS))
380 fprintf (dump_file, " Called by %s that is normal or hot\n",
fec39fa6 381 edge->caller->name ());
08f835dc
JH
382 d->maybe_unlikely_executed = false;
383 d->maybe_executed_once = false;
384 break;
385 }
386 }
387 return edge != NULL;
388}
389
daf5c770
JH
390/* Return ture if NODE contains hot calls. */
391
392bool
393contains_hot_call_p (struct cgraph_node *node)
394{
395 struct cgraph_edge *e;
396 for (e = node->callees; e; e = e->next_callee)
3dafb85c 397 if (e->maybe_hot_p ())
daf5c770
JH
398 return true;
399 else if (!e->inline_failed
400 && contains_hot_call_p (e->callee))
401 return true;
402 for (e = node->indirect_calls; e; e = e->next_callee)
3dafb85c 403 if (e->maybe_hot_p ())
daf5c770
JH
404 return true;
405 return false;
406}
407
08f835dc
JH
408/* See if the frequency of NODE can be updated based on frequencies of its
409 callers. */
410bool
411ipa_propagate_frequency (struct cgraph_node *node)
412{
1ede94c5 413 struct ipa_propagate_frequency_data d = {node, true, true, true, true};
08f835dc
JH
414 bool changed = false;
415
416 /* We can not propagate anything useful about externally visible functions
417 nor about virtuals. */
418 if (!node->local.local
67348ccc 419 || node->alias
2bf86c84
JH
420 || (opt_for_fn (node->decl, flag_devirtualize)
421 && DECL_VIRTUAL_P (node->decl)))
08f835dc 422 return false;
67348ccc 423 gcc_assert (node->analyzed);
08f835dc 424 if (dump_file && (dump_flags & TDF_DETAILS))
fec39fa6 425 fprintf (dump_file, "Processing frequency %s\n", node->name ());
08f835dc 426
1ede94c5
JH
427 node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
428 true);
08f835dc
JH
429
430 if ((d.only_called_at_startup && !d.only_called_at_exit)
431 && !node->only_called_at_startup)
432 {
433 node->only_called_at_startup = true;
434 if (dump_file)
435 fprintf (dump_file, "Node %s promoted to only called at startup.\n",
fec39fa6 436 node->name ());
08f835dc
JH
437 changed = true;
438 }
439 if ((d.only_called_at_exit && !d.only_called_at_startup)
440 && !node->only_called_at_exit)
441 {
442 node->only_called_at_exit = true;
443 if (dump_file)
444 fprintf (dump_file, "Node %s promoted to only called at exit.\n",
fec39fa6 445 node->name ());
08f835dc
JH
446 changed = true;
447 }
daf5c770
JH
448
449 /* With profile we can decide on hot/normal based on count. */
450 if (node->count)
451 {
452 bool hot = false;
453 if (node->count >= get_hot_bb_threshold ())
454 hot = true;
455 if (!hot)
456 hot |= contains_hot_call_p (node);
457 if (hot)
458 {
459 if (node->frequency != NODE_FREQUENCY_HOT)
460 {
461 if (dump_file)
462 fprintf (dump_file, "Node %s promoted to hot.\n",
fec39fa6 463 node->name ());
daf5c770
JH
464 node->frequency = NODE_FREQUENCY_HOT;
465 return true;
466 }
467 return false;
468 }
469 else if (node->frequency == NODE_FREQUENCY_HOT)
470 {
471 if (dump_file)
472 fprintf (dump_file, "Node %s reduced to normal.\n",
fec39fa6 473 node->name ());
daf5c770
JH
474 node->frequency = NODE_FREQUENCY_NORMAL;
475 changed = true;
476 }
477 }
08f835dc
JH
478 /* These come either from profile or user hints; never update them. */
479 if (node->frequency == NODE_FREQUENCY_HOT
480 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
481 return changed;
482 if (d.maybe_unlikely_executed)
483 {
484 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
485 if (dump_file)
486 fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
fec39fa6 487 node->name ());
08f835dc
JH
488 changed = true;
489 }
490 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
491 {
492 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
493 if (dump_file)
494 fprintf (dump_file, "Node %s promoted to executed once.\n",
fec39fa6 495 node->name ());
08f835dc
JH
496 changed = true;
497 }
498 return changed;
499}
500
501/* Simple ipa profile pass propagating frequencies across the callgraph. */
502
503static unsigned int
504ipa_profile (void)
505{
506 struct cgraph_node **order;
507 struct cgraph_edge *e;
508 int order_pos;
509 bool something_changed = false;
510 int i;
511 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
512 struct cgraph_node *n,*n2;
513 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
95d81ba5 514 int nmismatch = 0, nimpossible = 0;
08f835dc
JH
515 bool node_map_initialized = false;
516
517 if (dump_file)
518 dump_histogram (dump_file, histogram);
519 for (i = 0; i < (int)histogram.length (); i++)
520 {
521 overall_time += histogram[i]->count * histogram[i]->time;
522 overall_size += histogram[i]->size;
523 }
524 if (overall_time)
525 {
526 gcov_type threshold;
527
528 gcc_assert (overall_size);
529 if (dump_file)
530 {
531 gcov_type min, cumulated_time = 0, cumulated_size = 0;
532
16998094 533 fprintf (dump_file, "Overall time: %" PRId64"\n",
a9243bfc 534 (int64_t)overall_time);
08f835dc
JH
535 min = get_hot_bb_threshold ();
536 for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min;
537 i++)
538 {
539 cumulated_time += histogram[i]->count * histogram[i]->time;
540 cumulated_size += histogram[i]->size;
541 }
16998094 542 fprintf (dump_file, "GCOV min count: %" PRId64
08f835dc 543 " Time:%3.2f%% Size:%3.2f%%\n",
a9243bfc 544 (int64_t)min,
08f835dc
JH
545 cumulated_time * 100.0 / overall_time,
546 cumulated_size * 100.0 / overall_size);
547 }
548 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
549 threshold = 0;
550 for (i = 0; cumulated < cutoff; i++)
551 {
552 cumulated += histogram[i]->count * histogram[i]->time;
553 threshold = histogram[i]->count;
554 }
555 if (!threshold)
556 threshold = 1;
557 if (dump_file)
558 {
559 gcov_type cumulated_time = 0, cumulated_size = 0;
560
561 for (i = 0;
562 i < (int)histogram.length () && histogram[i]->count >= threshold;
563 i++)
564 {
565 cumulated_time += histogram[i]->count * histogram[i]->time;
566 cumulated_size += histogram[i]->size;
567 }
16998094 568 fprintf (dump_file, "Determined min count: %" PRId64
08f835dc 569 " Time:%3.2f%% Size:%3.2f%%\n",
a9243bfc 570 (int64_t)threshold,
08f835dc
JH
571 cumulated_time * 100.0 / overall_time,
572 cumulated_size * 100.0 / overall_size);
573 }
574 if (threshold > get_hot_bb_threshold ()
575 || in_lto_p)
576 {
577 if (dump_file)
578 fprintf (dump_file, "Threshold updated.\n");
579 set_hot_bb_threshold (threshold);
580 }
581 }
c3284718 582 histogram.release ();
d7809518 583 histogram_pool.release ();
08f835dc
JH
584
585 /* Produce speculative calls: we saved common traget from porfiling into
586 e->common_target_id. Now, at link time, we can look up corresponding
587 function node and produce speculative call. */
588
589 FOR_EACH_DEFINED_FUNCTION (n)
590 {
591 bool update = false;
592
1ede94c5
JH
593 if (!opt_for_fn (n->decl, flag_ipa_profile))
594 continue;
595
08f835dc
JH
596 for (e = n->indirect_calls; e; e = e->next_callee)
597 {
598 if (n->count)
599 nindirect++;
600 if (e->indirect_info->common_target_id)
601 {
602 if (!node_map_initialized)
603 init_node_map (false);
604 node_map_initialized = true;
605 ncommon++;
606 n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
607 if (n2)
608 {
609 if (dump_file)
610 {
611 fprintf (dump_file, "Indirect call -> direct call from"
612 " other module %s/%i => %s/%i, prob %3.2f\n",
2a72a953
DM
613 xstrdup_for_dump (n->name ()), n->order,
614 xstrdup_for_dump (n2->name ()), n2->order,
08f835dc
JH
615 e->indirect_info->common_target_probability
616 / (float)REG_BR_PROB_BASE);
617 }
618 if (e->indirect_info->common_target_probability
619 < REG_BR_PROB_BASE / 2)
620 {
621 nuseless++;
622 if (dump_file)
623 fprintf (dump_file,
624 "Not speculating: probability is too low.\n");
625 }
3dafb85c 626 else if (!e->maybe_hot_p ())
08f835dc
JH
627 {
628 nuseless++;
629 if (dump_file)
630 fprintf (dump_file,
631 "Not speculating: call is cold.\n");
632 }
d52f5295
ML
633 else if (n2->get_availability () <= AVAIL_INTERPOSABLE
634 && n2->can_be_discarded_p ())
08f835dc
JH
635 {
636 nuseless++;
637 if (dump_file)
638 fprintf (dump_file,
639 "Not speculating: target is overwritable "
640 "and can be discarded.\n");
641 }
95d81ba5
JH
642 else if (ipa_node_params_sum && ipa_edge_args_vector
643 && !IPA_NODE_REF (n2)->descriptors.is_empty ()
644 && ipa_get_param_count (IPA_NODE_REF (n2))
645 != ipa_get_cs_argument_count (IPA_EDGE_REF (e))
646 && (ipa_get_param_count (IPA_NODE_REF (n2))
647 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e))
648 || !stdarg_p (TREE_TYPE (n2->decl))))
649 {
650 nmismatch++;
651 if (dump_file)
652 fprintf (dump_file,
653 "Not speculating: "
654 "parameter count mistmatch\n");
655 }
656 else if (e->indirect_info->polymorphic
657 && !opt_for_fn (n->decl, flag_devirtualize)
658 && !possible_polymorphic_call_target_p (e, n2))
659 {
660 nimpossible++;
661 if (dump_file)
662 fprintf (dump_file,
663 "Not speculating: "
664 "function is not in the polymorphic "
665 "call target list\n");
666 }
08f835dc
JH
667 else
668 {
669 /* Target may be overwritable, but profile says that
670 control flow goes to this particular implementation
671 of N2. Speculate on the local alias to allow inlining.
672 */
d52f5295 673 if (!n2->can_be_discarded_p ())
5b79657a
JH
674 {
675 cgraph_node *alias;
d52f5295 676 alias = dyn_cast<cgraph_node *> (n2->noninterposable_alias ());
5b79657a
JH
677 if (alias)
678 n2 = alias;
679 }
08f835dc 680 nconverted++;
3dafb85c
ML
681 e->make_speculative
682 (n2,
08f835dc
JH
683 apply_scale (e->count,
684 e->indirect_info->common_target_probability),
685 apply_scale (e->frequency,
686 e->indirect_info->common_target_probability));
687 update = true;
688 }
689 }
690 else
691 {
692 if (dump_file)
693 fprintf (dump_file, "Function with profile-id %i not found.\n",
694 e->indirect_info->common_target_id);
695 nunknown++;
696 }
697 }
698 }
699 if (update)
700 inline_update_overall_summary (n);
701 }
702 if (node_map_initialized)
703 del_node_map ();
704 if (dump_file && nindirect)
705 fprintf (dump_file,
706 "%i indirect calls trained.\n"
707 "%i (%3.2f%%) have common target.\n"
708 "%i (%3.2f%%) targets was not found.\n"
95d81ba5
JH
709 "%i (%3.2f%%) targets had parameter count mismatch.\n"
710 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
08f835dc
JH
711 "%i (%3.2f%%) speculations seems useless.\n"
712 "%i (%3.2f%%) speculations produced.\n",
713 nindirect,
714 ncommon, ncommon * 100.0 / nindirect,
715 nunknown, nunknown * 100.0 / nindirect,
95d81ba5
JH
716 nmismatch, nmismatch * 100.0 / nindirect,
717 nimpossible, nimpossible * 100.0 / nindirect,
08f835dc
JH
718 nuseless, nuseless * 100.0 / nindirect,
719 nconverted, nconverted * 100.0 / nindirect);
720
3dafb85c 721 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
08f835dc
JH
722 order_pos = ipa_reverse_postorder (order);
723 for (i = order_pos - 1; i >= 0; i--)
724 {
1ede94c5
JH
725 if (order[i]->local.local
726 && opt_for_fn (order[i]->decl, flag_ipa_profile)
727 && ipa_propagate_frequency (order[i]))
08f835dc
JH
728 {
729 for (e = order[i]->callees; e; e = e->next_callee)
67348ccc 730 if (e->callee->local.local && !e->callee->aux)
08f835dc
JH
731 {
732 something_changed = true;
67348ccc 733 e->callee->aux = (void *)1;
08f835dc
JH
734 }
735 }
67348ccc 736 order[i]->aux = NULL;
08f835dc
JH
737 }
738
739 while (something_changed)
740 {
741 something_changed = false;
742 for (i = order_pos - 1; i >= 0; i--)
743 {
1ede94c5
JH
744 if (order[i]->aux
745 && opt_for_fn (order[i]->decl, flag_ipa_profile)
746 && ipa_propagate_frequency (order[i]))
08f835dc
JH
747 {
748 for (e = order[i]->callees; e; e = e->next_callee)
67348ccc 749 if (e->callee->local.local && !e->callee->aux)
08f835dc
JH
750 {
751 something_changed = true;
67348ccc 752 e->callee->aux = (void *)1;
08f835dc
JH
753 }
754 }
67348ccc 755 order[i]->aux = NULL;
08f835dc
JH
756 }
757 }
758 free (order);
759 return 0;
760}
761
08f835dc
JH
762namespace {
763
764const pass_data pass_data_ipa_profile =
765{
766 IPA_PASS, /* type */
767 "profile_estimate", /* name */
768 OPTGROUP_NONE, /* optinfo_flags */
08f835dc
JH
769 TV_IPA_PROFILE, /* tv_id */
770 0, /* properties_required */
771 0, /* properties_provided */
772 0, /* properties_destroyed */
773 0, /* todo_flags_start */
774 0, /* todo_flags_finish */
775};
776
777class pass_ipa_profile : public ipa_opt_pass_d
778{
779public:
c3284718
RS
780 pass_ipa_profile (gcc::context *ctxt)
781 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
782 ipa_profile_generate_summary, /* generate_summary */
783 ipa_profile_write_summary, /* write_summary */
784 ipa_profile_read_summary, /* read_summary */
785 NULL, /* write_optimization_summary */
786 NULL, /* read_optimization_summary */
787 NULL, /* stmt_fixup */
788 0, /* function_transform_todo_flags_start */
789 NULL, /* function_transform */
790 NULL) /* variable_transform */
08f835dc
JH
791 {}
792
793 /* opt_pass methods: */
2bf86c84 794 virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; }
be55bfe6 795 virtual unsigned int execute (function *) { return ipa_profile (); }
08f835dc
JH
796
797}; // class pass_ipa_profile
798
799} // anon namespace
800
801ipa_opt_pass_d *
802make_pass_ipa_profile (gcc::context *ctxt)
803{
804 return new pass_ipa_profile (ctxt);
805}