]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-profile.c
RISC-V: Handle implied extension for -march parser.
[thirdparty/gcc.git] / gcc / ipa-profile.c
1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* ipa-profile pass implements the following analysis propagating profille
21 inter-procedurally.
22
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
27
28 The information is used to set hot/cold thresholds.
29 - Next speculative indirect call resolution is performed: the local
30 profile pass assigns profile-id to each function and provide us with a
31 histogram specifying the most common target. We look up the callgraph
32 node corresponding to the target and produce a speculative call.
33
34 This call may or may not survive through IPA optimization based on decision
35 of inliner.
36 - Finally we propagate the following flags: unlikely executed, executed
37 once, executed at startup and executed at exit. These flags are used to
38 control code size/performance threshold and code placement (by producing
39 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
40 #include "config.h"
41 #include "system.h"
42 #include "coretypes.h"
43 #include "backend.h"
44 #include "tree.h"
45 #include "gimple.h"
46 #include "predict.h"
47 #include "alloc-pool.h"
48 #include "tree-pass.h"
49 #include "cgraph.h"
50 #include "data-streamer.h"
51 #include "gimple-iterator.h"
52 #include "ipa-utils.h"
53 #include "profile.h"
54 #include "value-prof.h"
55 #include "tree-inline.h"
56 #include "symbol-summary.h"
57 #include "tree-vrp.h"
58 #include "ipa-prop.h"
59 #include "ipa-fnsummary.h"
60
61 /* Entry in the histogram. */
62
63 struct histogram_entry
64 {
65 gcov_type count;
66 int time;
67 int size;
68 };
69
70 /* Histogram of profile values.
71 The histogram is represented as an ordered vector of entries allocated via
72 histogram_pool. During construction a separate hashtable is kept to lookup
73 duplicate entries. */
74
75 vec<histogram_entry *> histogram;
76 static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
77
78 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
79
80 struct histogram_hash : nofree_ptr_hash <histogram_entry>
81 {
82 static inline hashval_t hash (const histogram_entry *);
83 static inline int equal (const histogram_entry *, const histogram_entry *);
84 };
85
86 inline hashval_t
87 histogram_hash::hash (const histogram_entry *val)
88 {
89 return val->count;
90 }
91
92 inline int
93 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
94 {
95 return val->count == val2->count;
96 }
97
98 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
99 HASHTABLE is the on-side hash kept to avoid duplicates. */
100
101 static void
102 account_time_size (hash_table<histogram_hash> *hashtable,
103 vec<histogram_entry *> &histogram,
104 gcov_type count, int time, int size)
105 {
106 histogram_entry key = {count, 0, 0};
107 histogram_entry **val = hashtable->find_slot (&key, INSERT);
108
109 if (!*val)
110 {
111 *val = histogram_pool.allocate ();
112 **val = key;
113 histogram.safe_push (*val);
114 }
115 (*val)->time += time;
116 (*val)->size += size;
117 }
118
119 int
120 cmp_counts (const void *v1, const void *v2)
121 {
122 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
123 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
124 if (h1->count < h2->count)
125 return 1;
126 if (h1->count > h2->count)
127 return -1;
128 return 0;
129 }
130
131 /* Dump HISTOGRAM to FILE. */
132
133 static void
134 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
135 {
136 unsigned int i;
137 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0,
138 overall_size = 0;
139
140 fprintf (dump_file, "Histogram:\n");
141 for (i = 0; i < histogram.length (); i++)
142 {
143 overall_time += histogram[i]->count * histogram[i]->time;
144 overall_size += histogram[i]->size;
145 }
146 if (!overall_time)
147 overall_time = 1;
148 if (!overall_size)
149 overall_size = 1;
150 for (i = 0; i < histogram.length (); i++)
151 {
152 cumulated_time += histogram[i]->count * histogram[i]->time;
153 cumulated_size += histogram[i]->size;
154 fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
155 (int64_t) histogram[i]->count,
156 histogram[i]->time,
157 cumulated_time * 100.0 / overall_time,
158 histogram[i]->size,
159 cumulated_size * 100.0 / overall_size);
160 }
161 }
162
163 /* Structure containing speculative target information from profile. */
164
165 struct speculative_call_target
166 {
167 speculative_call_target (unsigned int id = 0, int prob = 0)
168 : target_id (id), target_probability (prob)
169 {
170 }
171
172 /* Profile_id of target obtained from profile. */
173 unsigned int target_id;
174 /* Probability that call will land in function with target_id. */
175 unsigned int target_probability;
176 };
177
178 class speculative_call_summary
179 {
180 public:
181 speculative_call_summary () : speculative_call_targets ()
182 {}
183
184 auto_vec<speculative_call_target> speculative_call_targets;
185
186 void dump (FILE *f);
187
188 };
189
190 /* Class to manage call summaries. */
191
192 class ipa_profile_call_summaries
193 : public call_summary<speculative_call_summary *>
194 {
195 public:
196 ipa_profile_call_summaries (symbol_table *table)
197 : call_summary<speculative_call_summary *> (table)
198 {}
199
200 /* Duplicate info when an edge is cloned. */
201 virtual void duplicate (cgraph_edge *, cgraph_edge *,
202 speculative_call_summary *old_sum,
203 speculative_call_summary *new_sum);
204 };
205
206 static ipa_profile_call_summaries *call_sums = NULL;
207
208 /* Dump all information in speculative call summary to F. */
209
210 void
211 speculative_call_summary::dump (FILE *f)
212 {
213 cgraph_node *n2;
214
215 unsigned spec_count = speculative_call_targets.length ();
216 for (unsigned i = 0; i < spec_count; i++)
217 {
218 speculative_call_target item = speculative_call_targets[i];
219 n2 = find_func_by_profile_id (item.target_id);
220 if (n2)
221 fprintf (f, " The %i speculative target is %s with prob %3.2f\n", i,
222 n2->dump_name (),
223 item.target_probability / (float) REG_BR_PROB_BASE);
224 else
225 fprintf (f, " The %i speculative target is %u with prob %3.2f\n", i,
226 item.target_id,
227 item.target_probability / (float) REG_BR_PROB_BASE);
228 }
229 }
230
231 /* Duplicate info when an edge is cloned. */
232
233 void
234 ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
235 speculative_call_summary *old_sum,
236 speculative_call_summary *new_sum)
237 {
238 if (!old_sum)
239 return;
240
241 unsigned old_count = old_sum->speculative_call_targets.length ();
242 if (!old_count)
243 return;
244
245 new_sum->speculative_call_targets.reserve_exact (old_count);
246 new_sum->speculative_call_targets.quick_grow_cleared (old_count);
247
248 for (unsigned i = 0; i < old_count; i++)
249 {
250 new_sum->speculative_call_targets[i]
251 = old_sum->speculative_call_targets[i];
252 }
253 }
254
255 /* Collect histogram and speculative target summaries from CFG profiles. */
256
257 static void
258 ipa_profile_generate_summary (void)
259 {
260 struct cgraph_node *node;
261 gimple_stmt_iterator gsi;
262 basic_block bb;
263
264 hash_table<histogram_hash> hashtable (10);
265
266 gcc_checking_assert (!call_sums);
267 call_sums = new ipa_profile_call_summaries (symtab);
268
269 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
270 if (ENTRY_BLOCK_PTR_FOR_FN
271 (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
272 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
273 {
274 int time = 0;
275 int size = 0;
276 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
277 {
278 gimple *stmt = gsi_stmt (gsi);
279 if (gimple_code (stmt) == GIMPLE_CALL
280 && !gimple_call_fndecl (stmt))
281 {
282 histogram_value h;
283 h = gimple_histogram_value_of_type
284 (DECL_STRUCT_FUNCTION (node->decl),
285 stmt, HIST_TYPE_INDIR_CALL);
286 /* No need to do sanity check: gimple_ic_transform already
287 takes away bad histograms. */
288 if (h)
289 {
290 gcov_type val, count, all;
291 struct cgraph_edge *e = node->get_edge (stmt);
292 if (e && !e->indirect_unknown_callee)
293 continue;
294
295 speculative_call_summary *csum
296 = call_sums->get_create (e);
297
298 for (unsigned j = 0; j < GCOV_TOPN_VALUES; j++)
299 {
300 if (!get_nth_most_common_value (NULL, "indirect call",
301 h, &val, &count, &all,
302 j))
303 continue;
304
305 if (val == 0 || count == 0)
306 continue;
307
308 if (count > all)
309 {
310 if (dump_file)
311 fprintf (dump_file,
312 "Probability capped to 1\n");
313 count = all;
314 }
315 speculative_call_target item (
316 val, GCOV_COMPUTE_SCALE (count, all));
317 csum->speculative_call_targets.safe_push (item);
318 }
319
320 gimple_remove_histogram_value
321 (DECL_STRUCT_FUNCTION (node->decl), stmt, h);
322 }
323 }
324 time += estimate_num_insns (stmt, &eni_time_weights);
325 size += estimate_num_insns (stmt, &eni_size_weights);
326 }
327 if (bb->count.ipa_p () && bb->count.initialized_p ())
328 account_time_size (&hashtable, histogram,
329 bb->count.ipa ().to_gcov_type (),
330 time, size);
331 }
332 histogram.qsort (cmp_counts);
333 }
334
335 /* Serialize the speculative summary info for LTO. */
336
337 static void
338 ipa_profile_write_edge_summary (lto_simple_output_block *ob,
339 speculative_call_summary *csum)
340 {
341 unsigned len = 0;
342
343 len = csum->speculative_call_targets.length ();
344
345 gcc_assert (len <= GCOV_TOPN_VALUES);
346
347 streamer_write_hwi_stream (ob->main_stream, len);
348
349 if (len)
350 {
351 unsigned spec_count = csum->speculative_call_targets.length ();
352 for (unsigned i = 0; i < spec_count; i++)
353 {
354 speculative_call_target item = csum->speculative_call_targets[i];
355 gcc_assert (item.target_id);
356 streamer_write_hwi_stream (ob->main_stream, item.target_id);
357 streamer_write_hwi_stream (ob->main_stream, item.target_probability);
358 }
359 }
360 }
361
362 /* Serialize the ipa info for lto. */
363
364 static void
365 ipa_profile_write_summary (void)
366 {
367 struct lto_simple_output_block *ob
368 = lto_create_simple_output_block (LTO_section_ipa_profile);
369 unsigned int i;
370
371 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
372 for (i = 0; i < histogram.length (); i++)
373 {
374 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
375 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
376 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
377 }
378
379 if (!call_sums)
380 return;
381
382 /* Serialize speculative targets information. */
383 unsigned int count = 0;
384 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
385 lto_symtab_encoder_iterator lsei;
386 cgraph_node *node;
387
388 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
389 lsei_next_function_in_partition (&lsei))
390 {
391 node = lsei_cgraph_node (lsei);
392 if (node->definition && node->has_gimple_body_p ()
393 && node->indirect_calls)
394 count++;
395 }
396
397 streamer_write_uhwi_stream (ob->main_stream, count);
398
399 /* Process all of the functions. */
400 for (lsei = lsei_start_function_in_partition (encoder);
401 !lsei_end_p (lsei) && count; lsei_next_function_in_partition (&lsei))
402 {
403 cgraph_node *node = lsei_cgraph_node (lsei);
404 if (node->definition && node->has_gimple_body_p ()
405 && node->indirect_calls)
406 {
407 int node_ref = lto_symtab_encoder_encode (encoder, node);
408 streamer_write_uhwi_stream (ob->main_stream, node_ref);
409
410 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
411 {
412 speculative_call_summary *csum = call_sums->get_create (e);
413 ipa_profile_write_edge_summary (ob, csum);
414 }
415 }
416 }
417
418 lto_destroy_simple_output_block (ob);
419 }
420
421 /* Dump all profile summary data for all cgraph nodes and edges to file F. */
422
423 static void
424 ipa_profile_dump_all_summaries (FILE *f)
425 {
426 fprintf (dump_file,
427 "\n========== IPA-profile speculative targets: ==========\n");
428 cgraph_node *node;
429 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
430 {
431 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
432 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
433 {
434 fprintf (f, " Summary for %s of indirect edge %d:\n",
435 e->caller->dump_name (), e->lto_stmt_uid);
436 speculative_call_summary *csum = call_sums->get_create (e);
437 csum->dump (f);
438 }
439 }
440 fprintf (f, "\n\n");
441 }
442
443 /* Read speculative targets information about edge for LTO WPA. */
444
445 static void
446 ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge)
447 {
448 unsigned i, len;
449
450 len = streamer_read_hwi (ib);
451 gcc_assert (len <= GCOV_TOPN_VALUES);
452
453 speculative_call_summary *csum = call_sums->get_create (edge);
454
455 for (i = 0; i < len; i++)
456 {
457 unsigned int target_id = streamer_read_hwi (ib);
458 int target_probability = streamer_read_hwi (ib);
459 speculative_call_target item (target_id, target_probability);
460 csum->speculative_call_targets.safe_push (item);
461 }
462 }
463
464 /* Read profile speculative targets section information for LTO WPA. */
465
466 static void
467 ipa_profile_read_summary_section (struct lto_file_decl_data *file_data,
468 class lto_input_block *ib)
469 {
470 if (!ib)
471 return;
472
473 lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
474
475 unsigned int count = streamer_read_uhwi (ib);
476
477 unsigned int i;
478 unsigned int index;
479 cgraph_node * node;
480
481 for (i = 0; i < count; i++)
482 {
483 index = streamer_read_uhwi (ib);
484 encoder = file_data->symtab_node_encoder;
485 node
486 = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, index));
487
488 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
489 ipa_profile_read_edge_summary (ib, e);
490 }
491 }
492
493 /* Deserialize the IPA histogram and speculative targets summary info for LTO.
494 */
495
496 static void
497 ipa_profile_read_summary (void)
498 {
499 struct lto_file_decl_data ** file_data_vec
500 = lto_get_file_decl_data ();
501 struct lto_file_decl_data * file_data;
502 int j = 0;
503
504 hash_table<histogram_hash> hashtable (10);
505
506 gcc_checking_assert (!call_sums);
507 call_sums = new ipa_profile_call_summaries (symtab);
508
509 while ((file_data = file_data_vec[j++]))
510 {
511 const char *data;
512 size_t len;
513 class lto_input_block *ib
514 = lto_create_simple_input_block (file_data,
515 LTO_section_ipa_profile,
516 &data, &len);
517 if (ib)
518 {
519 unsigned int num = streamer_read_uhwi (ib);
520 unsigned int n;
521 for (n = 0; n < num; n++)
522 {
523 gcov_type count = streamer_read_gcov_count (ib);
524 int time = streamer_read_uhwi (ib);
525 int size = streamer_read_uhwi (ib);
526 account_time_size (&hashtable, histogram,
527 count, time, size);
528 }
529
530 ipa_profile_read_summary_section (file_data, ib);
531
532 lto_destroy_simple_input_block (file_data,
533 LTO_section_ipa_profile,
534 ib, data, len);
535 }
536 }
537 histogram.qsort (cmp_counts);
538 }
539
540 /* Data used by ipa_propagate_frequency. */
541
542 struct ipa_propagate_frequency_data
543 {
544 cgraph_node *function_symbol;
545 bool maybe_unlikely_executed;
546 bool maybe_executed_once;
547 bool only_called_at_startup;
548 bool only_called_at_exit;
549 };
550
551 /* Worker for ipa_propagate_frequency_1. */
552
553 static bool
554 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
555 {
556 struct ipa_propagate_frequency_data *d;
557 struct cgraph_edge *edge;
558
559 d = (struct ipa_propagate_frequency_data *)data;
560 for (edge = node->callers;
561 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
562 || d->only_called_at_startup || d->only_called_at_exit);
563 edge = edge->next_caller)
564 {
565 if (edge->caller != d->function_symbol)
566 {
567 d->only_called_at_startup &= edge->caller->only_called_at_startup;
568 /* It makes sense to put main() together with the static constructors.
569 It will be executed for sure, but rest of functions called from
570 main are definitely not at startup only. */
571 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
572 d->only_called_at_startup = 0;
573 d->only_called_at_exit &= edge->caller->only_called_at_exit;
574 }
575
576 /* When profile feedback is available, do not try to propagate too hard;
577 counts are already good guide on function frequencies and roundoff
578 errors can make us to push function into unlikely section even when
579 it is executed by the train run. Transfer the function only if all
580 callers are unlikely executed. */
581 if (profile_info
582 && !(edge->callee->count.ipa () == profile_count::zero ())
583 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
584 || (edge->caller->inlined_to
585 && edge->caller->inlined_to->frequency
586 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
587 d->maybe_unlikely_executed = false;
588 if (edge->count.ipa ().initialized_p ()
589 && !edge->count.ipa ().nonzero_p ())
590 continue;
591 switch (edge->caller->frequency)
592 {
593 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
594 break;
595 case NODE_FREQUENCY_EXECUTED_ONCE:
596 {
597 if (dump_file && (dump_flags & TDF_DETAILS))
598 fprintf (dump_file, " Called by %s that is executed once\n",
599 edge->caller->dump_name ());
600 d->maybe_unlikely_executed = false;
601 ipa_call_summary *s = ipa_call_summaries->get (edge);
602 if (s != NULL && s->loop_depth)
603 {
604 d->maybe_executed_once = false;
605 if (dump_file && (dump_flags & TDF_DETAILS))
606 fprintf (dump_file, " Called in loop\n");
607 }
608 break;
609 }
610 case NODE_FREQUENCY_HOT:
611 case NODE_FREQUENCY_NORMAL:
612 if (dump_file && (dump_flags & TDF_DETAILS))
613 fprintf (dump_file, " Called by %s that is normal or hot\n",
614 edge->caller->dump_name ());
615 d->maybe_unlikely_executed = false;
616 d->maybe_executed_once = false;
617 break;
618 }
619 }
620 return edge != NULL;
621 }
622
623 /* Return ture if NODE contains hot calls. */
624
625 bool
626 contains_hot_call_p (struct cgraph_node *node)
627 {
628 struct cgraph_edge *e;
629 for (e = node->callees; e; e = e->next_callee)
630 if (e->maybe_hot_p ())
631 return true;
632 else if (!e->inline_failed
633 && contains_hot_call_p (e->callee))
634 return true;
635 for (e = node->indirect_calls; e; e = e->next_callee)
636 if (e->maybe_hot_p ())
637 return true;
638 return false;
639 }
640
641 /* See if the frequency of NODE can be updated based on frequencies of its
642 callers. */
643 bool
644 ipa_propagate_frequency (struct cgraph_node *node)
645 {
646 struct ipa_propagate_frequency_data d = {node, true, true, true, true};
647 bool changed = false;
648
649 /* We cannot propagate anything useful about externally visible functions
650 nor about virtuals. */
651 if (!node->local
652 || node->alias
653 || (opt_for_fn (node->decl, flag_devirtualize)
654 && DECL_VIRTUAL_P (node->decl)))
655 return false;
656 gcc_assert (node->analyzed);
657 if (dump_file && (dump_flags & TDF_DETAILS))
658 fprintf (dump_file, "Processing frequency %s\n", node->dump_name ());
659
660 node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
661 true);
662
663 if ((d.only_called_at_startup && !d.only_called_at_exit)
664 && !node->only_called_at_startup)
665 {
666 node->only_called_at_startup = true;
667 if (dump_file)
668 fprintf (dump_file, "Node %s promoted to only called at startup.\n",
669 node->dump_name ());
670 changed = true;
671 }
672 if ((d.only_called_at_exit && !d.only_called_at_startup)
673 && !node->only_called_at_exit)
674 {
675 node->only_called_at_exit = true;
676 if (dump_file)
677 fprintf (dump_file, "Node %s promoted to only called at exit.\n",
678 node->dump_name ());
679 changed = true;
680 }
681
682 /* With profile we can decide on hot/normal based on count. */
683 if (node->count. ipa().initialized_p ())
684 {
685 bool hot = false;
686 if (!(node->count. ipa() == profile_count::zero ())
687 && node->count. ipa() >= get_hot_bb_threshold ())
688 hot = true;
689 if (!hot)
690 hot |= contains_hot_call_p (node);
691 if (hot)
692 {
693 if (node->frequency != NODE_FREQUENCY_HOT)
694 {
695 if (dump_file)
696 fprintf (dump_file, "Node %s promoted to hot.\n",
697 node->dump_name ());
698 node->frequency = NODE_FREQUENCY_HOT;
699 return true;
700 }
701 return false;
702 }
703 else if (node->frequency == NODE_FREQUENCY_HOT)
704 {
705 if (dump_file)
706 fprintf (dump_file, "Node %s reduced to normal.\n",
707 node->dump_name ());
708 node->frequency = NODE_FREQUENCY_NORMAL;
709 changed = true;
710 }
711 }
712 /* These come either from profile or user hints; never update them. */
713 if (node->frequency == NODE_FREQUENCY_HOT
714 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
715 return changed;
716 if (d.maybe_unlikely_executed)
717 {
718 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
719 if (dump_file)
720 fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
721 node->dump_name ());
722 changed = true;
723 }
724 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
725 {
726 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
727 if (dump_file)
728 fprintf (dump_file, "Node %s promoted to executed once.\n",
729 node->dump_name ());
730 changed = true;
731 }
732 return changed;
733 }
734
735 /* Check that number of arguments of N agrees with E.
736 Be conservative when summaries are not present. */
737
738 static bool
739 check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
740 {
741 if (!ipa_node_params_sum || !ipa_edge_args_sum)
742 return true;
743 class ipa_node_params *info = IPA_NODE_REF (n->function_symbol ());
744 if (!info)
745 return true;
746 ipa_edge_args *e_info = IPA_EDGE_REF (e);
747 if (!e_info)
748 return true;
749 if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info)
750 && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info)
751 || !stdarg_p (TREE_TYPE (n->decl))))
752 return false;
753 return true;
754 }
755
756 /* Simple ipa profile pass propagating frequencies across the callgraph. */
757
758 static unsigned int
759 ipa_profile (void)
760 {
761 struct cgraph_node **order;
762 struct cgraph_edge *e;
763 int order_pos;
764 bool something_changed = false;
765 int i;
766 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
767 struct cgraph_node *n,*n2;
768 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
769 int nmismatch = 0, nimpossible = 0;
770 bool node_map_initialized = false;
771 gcov_type threshold;
772
773 if (dump_file)
774 dump_histogram (dump_file, histogram);
775 for (i = 0; i < (int)histogram.length (); i++)
776 {
777 overall_time += histogram[i]->count * histogram[i]->time;
778 overall_size += histogram[i]->size;
779 }
780 threshold = 0;
781 if (overall_time)
782 {
783 gcc_assert (overall_size);
784
785 cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000;
786 for (i = 0; cumulated < cutoff; i++)
787 {
788 cumulated += histogram[i]->count * histogram[i]->time;
789 threshold = histogram[i]->count;
790 }
791 if (!threshold)
792 threshold = 1;
793 if (dump_file)
794 {
795 gcov_type cumulated_time = 0, cumulated_size = 0;
796
797 for (i = 0;
798 i < (int)histogram.length () && histogram[i]->count >= threshold;
799 i++)
800 {
801 cumulated_time += histogram[i]->count * histogram[i]->time;
802 cumulated_size += histogram[i]->size;
803 }
804 fprintf (dump_file, "Determined min count: %" PRId64
805 " Time:%3.2f%% Size:%3.2f%%\n",
806 (int64_t)threshold,
807 cumulated_time * 100.0 / overall_time,
808 cumulated_size * 100.0 / overall_size);
809 }
810
811 if (in_lto_p)
812 {
813 if (dump_file)
814 fprintf (dump_file, "Setting hotness threshold in LTO mode.\n");
815 set_hot_bb_threshold (threshold);
816 }
817 }
818 histogram.release ();
819 histogram_pool.release ();
820
821 /* Produce speculative calls: we saved common target from profiling into
822 e->target_id. Now, at link time, we can look up corresponding
823 function node and produce speculative call. */
824
825 gcc_checking_assert (call_sums);
826
827 if (dump_file)
828 {
829 if (!node_map_initialized)
830 init_node_map (false);
831 node_map_initialized = true;
832
833 ipa_profile_dump_all_summaries (dump_file);
834 }
835
836 FOR_EACH_DEFINED_FUNCTION (n)
837 {
838 bool update = false;
839
840 if (!opt_for_fn (n->decl, flag_ipa_profile))
841 continue;
842
843 for (e = n->indirect_calls; e; e = e->next_callee)
844 {
845 if (n->count.initialized_p ())
846 nindirect++;
847
848 speculative_call_summary *csum = call_sums->get_create (e);
849 unsigned spec_count = csum->speculative_call_targets.length ();
850 if (spec_count)
851 {
852 if (!node_map_initialized)
853 init_node_map (false);
854 node_map_initialized = true;
855 ncommon++;
856
857 if (in_lto_p)
858 {
859 if (dump_file)
860 {
861 fprintf (dump_file,
862 "Updating hotness threshold in LTO mode.\n");
863 fprintf (dump_file, "Updated min count: %" PRId64 "\n",
864 (int64_t) threshold / spec_count);
865 }
866 set_hot_bb_threshold (threshold / spec_count);
867 }
868
869 unsigned speculative_id = 0;
870 profile_count orig = e->count;
871 for (unsigned i = 0; i < spec_count; i++)
872 {
873 speculative_call_target item
874 = csum->speculative_call_targets[i];
875 n2 = find_func_by_profile_id (item.target_id);
876 if (n2)
877 {
878 if (dump_file)
879 {
880 fprintf (dump_file,
881 "Indirect call -> direct call from"
882 " other module %s => %s, prob %3.2f\n",
883 n->dump_name (),
884 n2->dump_name (),
885 item.target_probability
886 / (float) REG_BR_PROB_BASE);
887 }
888 if (item.target_probability
889 < REG_BR_PROB_BASE / GCOV_TOPN_VALUES / 2)
890 {
891 nuseless++;
892 if (dump_file)
893 fprintf (dump_file,
894 "Not speculating: "
895 "probability is too low.\n");
896 }
897 else if (!e->maybe_hot_p ())
898 {
899 nuseless++;
900 if (dump_file)
901 fprintf (dump_file,
902 "Not speculating: call is cold.\n");
903 }
904 else if (n2->get_availability () <= AVAIL_INTERPOSABLE
905 && n2->can_be_discarded_p ())
906 {
907 nuseless++;
908 if (dump_file)
909 fprintf (dump_file,
910 "Not speculating: target is overwritable "
911 "and can be discarded.\n");
912 }
913 else if (!check_argument_count (n2, e))
914 {
915 nmismatch++;
916 if (dump_file)
917 fprintf (dump_file,
918 "Not speculating: "
919 "parameter count mismatch\n");
920 }
921 else if (e->indirect_info->polymorphic
922 && !opt_for_fn (n->decl, flag_devirtualize)
923 && !possible_polymorphic_call_target_p (e, n2))
924 {
925 nimpossible++;
926 if (dump_file)
927 fprintf (dump_file,
928 "Not speculating: "
929 "function is not in the polymorphic "
930 "call target list\n");
931 }
932 else
933 {
934 /* Target may be overwritable, but profile says that
935 control flow goes to this particular implementation
936 of N2. Speculate on the local alias to allow
937 inlining. */
938 if (!n2->can_be_discarded_p ())
939 {
940 cgraph_node *alias;
941 alias = dyn_cast<cgraph_node *>
942 (n2->noninterposable_alias ());
943 if (alias)
944 n2 = alias;
945 }
946 nconverted++;
947 profile_probability prob
948 = profile_probability::from_reg_br_prob_base
949 (item.target_probability).adjusted ();
950 e->make_speculative (n2,
951 orig.apply_probability (prob),
952 speculative_id);
953 update = true;
954 speculative_id++;
955 }
956 }
957 else
958 {
959 if (dump_file)
960 fprintf (dump_file,
961 "Function with profile-id %i not found.\n",
962 item.target_id);
963 nunknown++;
964 }
965 }
966 }
967 }
968 if (update)
969 ipa_update_overall_fn_summary (n);
970 }
971 if (node_map_initialized)
972 del_node_map ();
973 if (dump_file && nindirect)
974 fprintf (dump_file,
975 "%i indirect calls trained.\n"
976 "%i (%3.2f%%) have common target.\n"
977 "%i (%3.2f%%) targets was not found.\n"
978 "%i (%3.2f%%) targets had parameter count mismatch.\n"
979 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
980 "%i (%3.2f%%) speculations seems useless.\n"
981 "%i (%3.2f%%) speculations produced.\n",
982 nindirect,
983 ncommon, ncommon * 100.0 / nindirect,
984 nunknown, nunknown * 100.0 / nindirect,
985 nmismatch, nmismatch * 100.0 / nindirect,
986 nimpossible, nimpossible * 100.0 / nindirect,
987 nuseless, nuseless * 100.0 / nindirect,
988 nconverted, nconverted * 100.0 / nindirect);
989
990 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
991 order_pos = ipa_reverse_postorder (order);
992 for (i = order_pos - 1; i >= 0; i--)
993 {
994 if (order[i]->local
995 && opt_for_fn (order[i]->decl, flag_ipa_profile)
996 && ipa_propagate_frequency (order[i]))
997 {
998 for (e = order[i]->callees; e; e = e->next_callee)
999 if (e->callee->local && !e->callee->aux)
1000 {
1001 something_changed = true;
1002 e->callee->aux = (void *)1;
1003 }
1004 }
1005 order[i]->aux = NULL;
1006 }
1007
1008 while (something_changed)
1009 {
1010 something_changed = false;
1011 for (i = order_pos - 1; i >= 0; i--)
1012 {
1013 if (order[i]->aux
1014 && opt_for_fn (order[i]->decl, flag_ipa_profile)
1015 && ipa_propagate_frequency (order[i]))
1016 {
1017 for (e = order[i]->callees; e; e = e->next_callee)
1018 if (e->callee->local && !e->callee->aux)
1019 {
1020 something_changed = true;
1021 e->callee->aux = (void *)1;
1022 }
1023 }
1024 order[i]->aux = NULL;
1025 }
1026 }
1027 free (order);
1028
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1030 symtab->dump (dump_file);
1031
1032 delete call_sums;
1033 call_sums = NULL;
1034
1035 return 0;
1036 }
1037
1038 namespace {
1039
1040 const pass_data pass_data_ipa_profile =
1041 {
1042 IPA_PASS, /* type */
1043 "profile_estimate", /* name */
1044 OPTGROUP_NONE, /* optinfo_flags */
1045 TV_IPA_PROFILE, /* tv_id */
1046 0, /* properties_required */
1047 0, /* properties_provided */
1048 0, /* properties_destroyed */
1049 0, /* todo_flags_start */
1050 0, /* todo_flags_finish */
1051 };
1052
1053 class pass_ipa_profile : public ipa_opt_pass_d
1054 {
1055 public:
1056 pass_ipa_profile (gcc::context *ctxt)
1057 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
1058 ipa_profile_generate_summary, /* generate_summary */
1059 ipa_profile_write_summary, /* write_summary */
1060 ipa_profile_read_summary, /* read_summary */
1061 NULL, /* write_optimization_summary */
1062 NULL, /* read_optimization_summary */
1063 NULL, /* stmt_fixup */
1064 0, /* function_transform_todo_flags_start */
1065 NULL, /* function_transform */
1066 NULL) /* variable_transform */
1067 {}
1068
1069 /* opt_pass methods: */
1070 virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; }
1071 virtual unsigned int execute (function *) { return ipa_profile (); }
1072
1073 }; // class pass_ipa_profile
1074
1075 } // anon namespace
1076
1077 ipa_opt_pass_d *
1078 make_pass_ipa_profile (gcc::context *ctxt)
1079 {
1080 return new pass_ipa_profile (ctxt);
1081 }