]>
Commit | Line | Data |
---|---|---|
08f835dc JH |
1 | /* Basic IPA optimizations based on profile. |
2 | Copyright (C) 2003-2013 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
daf5c770 JH |
20 | /* ipa-profile pass implements the following analysis propagating profille |
21 | inter-procedurally. | |
22 | ||
23 | - Count histogram construction. This is a histogram analyzing how much | |
24 | time is spent executing statements with a given execution count read | |
25 | from profile feedback. This histogram is complette only with LTO, | |
26 | otherwise it contains information only about the current unit. | |
27 | ||
28 | Similar histogram is also estimated by coverage runtime. This histogram | |
29 | is not dependent on LTO, but it suffers from various defects; first | |
30 | gcov runtime is not weighting individual basic block by estimated execution | |
31 | time and second the merging of multiple runs makes assumption that the | |
32 | histogram distribution did not change. Consequentely histogram constructed | |
33 | here may be more precise. | |
34 | ||
35 | The information is used to set hot/cold thresholds. | |
36 | - Next speculative indirect call resolution is performed: the local | |
37 | profile pass assigns profile-id to each function and provide us with a | |
38 | histogram specifying the most common target. We look up the callgraph | |
39 | node corresponding to the target and produce a speculative call. | |
40 | ||
41 | This call may or may not survive through IPA optimization based on decision | |
42 | of inliner. | |
43 | - Finally we propagate the following flags: unlikely executed, executed | |
44 | once, executed at startup and executed at exit. These flags are used to | |
45 | control code size/performance threshold and and code placement (by producing | |
46 | .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */ | |
08f835dc JH |
47 | #include "config.h" |
48 | #include "system.h" | |
49 | #include "coretypes.h" | |
50 | #include "tm.h" | |
4d648807 | 51 | #include "tree.h" |
08f835dc JH |
52 | #include "cgraph.h" |
53 | #include "tree-pass.h" | |
2fb9a547 AM |
54 | #include "tree-ssa-alias.h" |
55 | #include "internal-fn.h" | |
56 | #include "gimple-expr.h" | |
08f835dc | 57 | #include "gimple.h" |
5be5c238 | 58 | #include "gimple-iterator.h" |
08f835dc JH |
59 | #include "flags.h" |
60 | #include "target.h" | |
61 | #include "tree-iterator.h" | |
62 | #include "ipa-utils.h" | |
08f835dc JH |
63 | #include "profile.h" |
64 | #include "params.h" | |
65 | #include "value-prof.h" | |
66 | #include "alloc-pool.h" | |
67 | #include "tree-inline.h" | |
68 | #include "lto-streamer.h" | |
69 | #include "data-streamer.h" | |
70 | #include "ipa-inline.h" | |
71 | ||
72 | /* Entry in the histogram. */ | |
73 | ||
74 | struct histogram_entry | |
75 | { | |
76 | gcov_type count; | |
77 | int time; | |
78 | int size; | |
79 | }; | |
80 | ||
81 | /* Histogram of profile values. | |
82 | The histogram is represented as an ordered vector of entries allocated via | |
83 | histogram_pool. During construction a separate hashtable is kept to lookup | |
84 | duplicate entries. */ | |
85 | ||
86 | vec<histogram_entry *> histogram; | |
87 | static alloc_pool histogram_pool; | |
88 | ||
89 | /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */ | |
90 | ||
91 | struct histogram_hash : typed_noop_remove <histogram_entry> | |
92 | { | |
93 | typedef histogram_entry value_type; | |
94 | typedef histogram_entry compare_type; | |
95 | static inline hashval_t hash (const value_type *); | |
96 | static inline int equal (const value_type *, const compare_type *); | |
97 | }; | |
98 | ||
99 | inline hashval_t | |
100 | histogram_hash::hash (const histogram_entry *val) | |
101 | { | |
102 | return val->count; | |
103 | } | |
104 | ||
105 | inline int | |
106 | histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2) | |
107 | { | |
108 | return val->count == val2->count; | |
109 | } | |
110 | ||
111 | /* Account TIME and SIZE executed COUNT times into HISTOGRAM. | |
112 | HASHTABLE is the on-side hash kept to avoid duplicates. */ | |
113 | ||
114 | static void | |
115 | account_time_size (hash_table <histogram_hash> hashtable, | |
116 | vec<histogram_entry *> &histogram, | |
117 | gcov_type count, int time, int size) | |
118 | { | |
119 | histogram_entry key = {count, 0, 0}; | |
120 | histogram_entry **val = hashtable.find_slot (&key, INSERT); | |
121 | ||
122 | if (!*val) | |
123 | { | |
124 | *val = (histogram_entry *) pool_alloc (histogram_pool); | |
125 | **val = key; | |
126 | histogram.safe_push (*val); | |
127 | } | |
128 | (*val)->time += time; | |
129 | (*val)->size += size; | |
130 | } | |
131 | ||
132 | int | |
133 | cmp_counts (const void *v1, const void *v2) | |
134 | { | |
135 | const histogram_entry *h1 = *(const histogram_entry * const *)v1; | |
136 | const histogram_entry *h2 = *(const histogram_entry * const *)v2; | |
137 | if (h1->count < h2->count) | |
138 | return 1; | |
139 | if (h1->count > h2->count) | |
140 | return -1; | |
141 | return 0; | |
142 | } | |
143 | ||
144 | /* Dump HISTOGRAM to FILE. */ | |
145 | ||
146 | static void | |
147 | dump_histogram (FILE *file, vec<histogram_entry *> histogram) | |
148 | { | |
149 | unsigned int i; | |
150 | gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0; | |
151 | ||
152 | fprintf (dump_file, "Histogram:\n"); | |
153 | for (i = 0; i < histogram.length (); i++) | |
154 | { | |
155 | overall_time += histogram[i]->count * histogram[i]->time; | |
156 | overall_size += histogram[i]->size; | |
157 | } | |
158 | if (!overall_time) | |
159 | overall_time = 1; | |
160 | if (!overall_size) | |
161 | overall_size = 1; | |
162 | for (i = 0; i < histogram.length (); i++) | |
163 | { | |
164 | cumulated_time += histogram[i]->count * histogram[i]->time; | |
165 | cumulated_size += histogram[i]->size; | |
166 | fprintf (file, " "HOST_WIDEST_INT_PRINT_DEC": time:%i (%2.2f) size:%i (%2.2f)\n", | |
167 | (HOST_WIDEST_INT) histogram[i]->count, | |
168 | histogram[i]->time, | |
169 | cumulated_time * 100.0 / overall_time, | |
170 | histogram[i]->size, | |
171 | cumulated_size * 100.0 / overall_size); | |
172 | } | |
173 | } | |
174 | ||
175 | /* Collect histogram from CFG profiles. */ | |
176 | ||
177 | static void | |
178 | ipa_profile_generate_summary (void) | |
179 | { | |
180 | struct cgraph_node *node; | |
181 | gimple_stmt_iterator gsi; | |
182 | hash_table <histogram_hash> hashtable; | |
183 | basic_block bb; | |
184 | ||
185 | hashtable.create (10); | |
186 | histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry), | |
187 | 10); | |
188 | ||
189 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) | |
67348ccc | 190 | FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) |
08f835dc JH |
191 | { |
192 | int time = 0; | |
193 | int size = 0; | |
194 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
195 | { | |
196 | gimple stmt = gsi_stmt (gsi); | |
197 | if (gimple_code (stmt) == GIMPLE_CALL | |
198 | && !gimple_call_fndecl (stmt)) | |
199 | { | |
200 | histogram_value h; | |
201 | h = gimple_histogram_value_of_type | |
67348ccc | 202 | (DECL_STRUCT_FUNCTION (node->decl), |
08f835dc JH |
203 | stmt, HIST_TYPE_INDIR_CALL); |
204 | /* No need to do sanity check: gimple_ic_transform already | |
205 | takes away bad histograms. */ | |
206 | if (h) | |
207 | { | |
208 | /* counter 0 is target, counter 1 is number of execution we called target, | |
209 | counter 2 is total number of executions. */ | |
210 | if (h->hvalue.counters[2]) | |
211 | { | |
212 | struct cgraph_edge * e = cgraph_edge (node, stmt); | |
213 | e->indirect_info->common_target_id | |
214 | = h->hvalue.counters [0]; | |
215 | e->indirect_info->common_target_probability | |
216 | = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); | |
217 | if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) | |
218 | { | |
219 | if (dump_file) | |
220 | fprintf (dump_file, "Probability capped to 1\n"); | |
221 | e->indirect_info->common_target_probability = REG_BR_PROB_BASE; | |
222 | } | |
223 | } | |
67348ccc | 224 | gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), |
08f835dc JH |
225 | stmt, h); |
226 | } | |
227 | } | |
228 | time += estimate_num_insns (stmt, &eni_time_weights); | |
229 | size += estimate_num_insns (stmt, &eni_size_weights); | |
230 | } | |
231 | account_time_size (hashtable, histogram, bb->count, time, size); | |
232 | } | |
233 | hashtable.dispose (); | |
234 | histogram.qsort (cmp_counts); | |
235 | } | |
236 | ||
237 | /* Serialize the ipa info for lto. */ | |
238 | ||
239 | static void | |
240 | ipa_profile_write_summary (void) | |
241 | { | |
242 | struct lto_simple_output_block *ob | |
243 | = lto_create_simple_output_block (LTO_section_ipa_profile); | |
244 | unsigned int i; | |
245 | ||
c3284718 | 246 | streamer_write_uhwi_stream (ob->main_stream, histogram.length ()); |
08f835dc JH |
247 | for (i = 0; i < histogram.length (); i++) |
248 | { | |
249 | streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count); | |
250 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time); | |
251 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size); | |
252 | } | |
253 | lto_destroy_simple_output_block (ob); | |
254 | } | |
255 | ||
256 | /* Deserialize the ipa info for lto. */ | |
257 | ||
258 | static void | |
259 | ipa_profile_read_summary (void) | |
260 | { | |
261 | struct lto_file_decl_data ** file_data_vec | |
262 | = lto_get_file_decl_data (); | |
263 | struct lto_file_decl_data * file_data; | |
264 | hash_table <histogram_hash> hashtable; | |
265 | int j = 0; | |
266 | ||
267 | hashtable.create (10); | |
268 | histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry), | |
269 | 10); | |
270 | ||
271 | while ((file_data = file_data_vec[j++])) | |
272 | { | |
273 | const char *data; | |
274 | size_t len; | |
275 | struct lto_input_block *ib | |
276 | = lto_create_simple_input_block (file_data, | |
277 | LTO_section_ipa_profile, | |
278 | &data, &len); | |
279 | if (ib) | |
280 | { | |
281 | unsigned int num = streamer_read_uhwi (ib); | |
282 | unsigned int n; | |
283 | for (n = 0; n < num; n++) | |
284 | { | |
285 | gcov_type count = streamer_read_gcov_count (ib); | |
286 | int time = streamer_read_uhwi (ib); | |
287 | int size = streamer_read_uhwi (ib); | |
288 | account_time_size (hashtable, histogram, | |
289 | count, time, size); | |
290 | } | |
291 | lto_destroy_simple_input_block (file_data, | |
292 | LTO_section_ipa_profile, | |
293 | ib, data, len); | |
294 | } | |
295 | } | |
296 | hashtable.dispose (); | |
297 | histogram.qsort (cmp_counts); | |
298 | } | |
299 | ||
300 | /* Data used by ipa_propagate_frequency. */ | |
301 | ||
302 | struct ipa_propagate_frequency_data | |
303 | { | |
304 | bool maybe_unlikely_executed; | |
305 | bool maybe_executed_once; | |
306 | bool only_called_at_startup; | |
307 | bool only_called_at_exit; | |
308 | }; | |
309 | ||
310 | /* Worker for ipa_propagate_frequency_1. */ | |
311 | ||
312 | static bool | |
313 | ipa_propagate_frequency_1 (struct cgraph_node *node, void *data) | |
314 | { | |
315 | struct ipa_propagate_frequency_data *d; | |
316 | struct cgraph_edge *edge; | |
317 | ||
318 | d = (struct ipa_propagate_frequency_data *)data; | |
319 | for (edge = node->callers; | |
320 | edge && (d->maybe_unlikely_executed || d->maybe_executed_once | |
321 | || d->only_called_at_startup || d->only_called_at_exit); | |
322 | edge = edge->next_caller) | |
323 | { | |
324 | if (edge->caller != node) | |
325 | { | |
326 | d->only_called_at_startup &= edge->caller->only_called_at_startup; | |
327 | /* It makes sense to put main() together with the static constructors. | |
328 | It will be executed for sure, but rest of functions called from | |
329 | main are definitely not at startup only. */ | |
67348ccc | 330 | if (MAIN_NAME_P (DECL_NAME (edge->caller->decl))) |
08f835dc JH |
331 | d->only_called_at_startup = 0; |
332 | d->only_called_at_exit &= edge->caller->only_called_at_exit; | |
333 | } | |
daf5c770 JH |
334 | |
335 | /* When profile feedback is available, do not try to propagate too hard; | |
336 | counts are already good guide on function frequencies and roundoff | |
337 | errors can make us to push function into unlikely section even when | |
338 | it is executed by the train run. Transfer the function only if all | |
339 | callers are unlikely executed. */ | |
340 | if (profile_info && flag_branch_probabilities | |
341 | && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED | |
342 | || (edge->caller->global.inlined_to | |
343 | && edge->caller->global.inlined_to->frequency | |
344 | != NODE_FREQUENCY_UNLIKELY_EXECUTED))) | |
345 | d->maybe_unlikely_executed = false; | |
08f835dc JH |
346 | if (!edge->frequency) |
347 | continue; | |
348 | switch (edge->caller->frequency) | |
349 | { | |
350 | case NODE_FREQUENCY_UNLIKELY_EXECUTED: | |
351 | break; | |
352 | case NODE_FREQUENCY_EXECUTED_ONCE: | |
353 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
354 | fprintf (dump_file, " Called by %s that is executed once\n", | |
fec39fa6 | 355 | edge->caller->name ()); |
08f835dc JH |
356 | d->maybe_unlikely_executed = false; |
357 | if (inline_edge_summary (edge)->loop_depth) | |
358 | { | |
359 | d->maybe_executed_once = false; | |
360 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
361 | fprintf (dump_file, " Called in loop\n"); | |
362 | } | |
363 | break; | |
364 | case NODE_FREQUENCY_HOT: | |
365 | case NODE_FREQUENCY_NORMAL: | |
366 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
367 | fprintf (dump_file, " Called by %s that is normal or hot\n", | |
fec39fa6 | 368 | edge->caller->name ()); |
08f835dc JH |
369 | d->maybe_unlikely_executed = false; |
370 | d->maybe_executed_once = false; | |
371 | break; | |
372 | } | |
373 | } | |
374 | return edge != NULL; | |
375 | } | |
376 | ||
daf5c770 JH |
377 | /* Return ture if NODE contains hot calls. */ |
378 | ||
379 | bool | |
380 | contains_hot_call_p (struct cgraph_node *node) | |
381 | { | |
382 | struct cgraph_edge *e; | |
383 | for (e = node->callees; e; e = e->next_callee) | |
384 | if (cgraph_maybe_hot_edge_p (e)) | |
385 | return true; | |
386 | else if (!e->inline_failed | |
387 | && contains_hot_call_p (e->callee)) | |
388 | return true; | |
389 | for (e = node->indirect_calls; e; e = e->next_callee) | |
390 | if (cgraph_maybe_hot_edge_p (e)) | |
391 | return true; | |
392 | return false; | |
393 | } | |
394 | ||
08f835dc JH |
395 | /* See if the frequency of NODE can be updated based on frequencies of its |
396 | callers. */ | |
397 | bool | |
398 | ipa_propagate_frequency (struct cgraph_node *node) | |
399 | { | |
400 | struct ipa_propagate_frequency_data d = {true, true, true, true}; | |
401 | bool changed = false; | |
402 | ||
403 | /* We can not propagate anything useful about externally visible functions | |
404 | nor about virtuals. */ | |
405 | if (!node->local.local | |
67348ccc DM |
406 | || node->alias |
407 | || (flag_devirtualize && DECL_VIRTUAL_P (node->decl))) | |
08f835dc | 408 | return false; |
67348ccc | 409 | gcc_assert (node->analyzed); |
08f835dc | 410 | if (dump_file && (dump_flags & TDF_DETAILS)) |
fec39fa6 | 411 | fprintf (dump_file, "Processing frequency %s\n", node->name ()); |
08f835dc JH |
412 | |
413 | cgraph_for_node_and_aliases (node, ipa_propagate_frequency_1, &d, true); | |
414 | ||
415 | if ((d.only_called_at_startup && !d.only_called_at_exit) | |
416 | && !node->only_called_at_startup) | |
417 | { | |
418 | node->only_called_at_startup = true; | |
419 | if (dump_file) | |
420 | fprintf (dump_file, "Node %s promoted to only called at startup.\n", | |
fec39fa6 | 421 | node->name ()); |
08f835dc JH |
422 | changed = true; |
423 | } | |
424 | if ((d.only_called_at_exit && !d.only_called_at_startup) | |
425 | && !node->only_called_at_exit) | |
426 | { | |
427 | node->only_called_at_exit = true; | |
428 | if (dump_file) | |
429 | fprintf (dump_file, "Node %s promoted to only called at exit.\n", | |
fec39fa6 | 430 | node->name ()); |
08f835dc JH |
431 | changed = true; |
432 | } | |
daf5c770 JH |
433 | |
434 | /* With profile we can decide on hot/normal based on count. */ | |
435 | if (node->count) | |
436 | { | |
437 | bool hot = false; | |
438 | if (node->count >= get_hot_bb_threshold ()) | |
439 | hot = true; | |
440 | if (!hot) | |
441 | hot |= contains_hot_call_p (node); | |
442 | if (hot) | |
443 | { | |
444 | if (node->frequency != NODE_FREQUENCY_HOT) | |
445 | { | |
446 | if (dump_file) | |
447 | fprintf (dump_file, "Node %s promoted to hot.\n", | |
fec39fa6 | 448 | node->name ()); |
daf5c770 JH |
449 | node->frequency = NODE_FREQUENCY_HOT; |
450 | return true; | |
451 | } | |
452 | return false; | |
453 | } | |
454 | else if (node->frequency == NODE_FREQUENCY_HOT) | |
455 | { | |
456 | if (dump_file) | |
457 | fprintf (dump_file, "Node %s reduced to normal.\n", | |
fec39fa6 | 458 | node->name ()); |
daf5c770 JH |
459 | node->frequency = NODE_FREQUENCY_NORMAL; |
460 | changed = true; | |
461 | } | |
462 | } | |
08f835dc JH |
463 | /* These come either from profile or user hints; never update them. */ |
464 | if (node->frequency == NODE_FREQUENCY_HOT | |
465 | || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) | |
466 | return changed; | |
467 | if (d.maybe_unlikely_executed) | |
468 | { | |
469 | node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; | |
470 | if (dump_file) | |
471 | fprintf (dump_file, "Node %s promoted to unlikely executed.\n", | |
fec39fa6 | 472 | node->name ()); |
08f835dc JH |
473 | changed = true; |
474 | } | |
475 | else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE) | |
476 | { | |
477 | node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; | |
478 | if (dump_file) | |
479 | fprintf (dump_file, "Node %s promoted to executed once.\n", | |
fec39fa6 | 480 | node->name ()); |
08f835dc JH |
481 | changed = true; |
482 | } | |
483 | return changed; | |
484 | } | |
485 | ||
486 | /* Simple ipa profile pass propagating frequencies across the callgraph. */ | |
487 | ||
488 | static unsigned int | |
489 | ipa_profile (void) | |
490 | { | |
491 | struct cgraph_node **order; | |
492 | struct cgraph_edge *e; | |
493 | int order_pos; | |
494 | bool something_changed = false; | |
495 | int i; | |
496 | gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0; | |
497 | struct cgraph_node *n,*n2; | |
498 | int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0; | |
499 | bool node_map_initialized = false; | |
500 | ||
501 | if (dump_file) | |
502 | dump_histogram (dump_file, histogram); | |
503 | for (i = 0; i < (int)histogram.length (); i++) | |
504 | { | |
505 | overall_time += histogram[i]->count * histogram[i]->time; | |
506 | overall_size += histogram[i]->size; | |
507 | } | |
508 | if (overall_time) | |
509 | { | |
510 | gcov_type threshold; | |
511 | ||
512 | gcc_assert (overall_size); | |
513 | if (dump_file) | |
514 | { | |
515 | gcov_type min, cumulated_time = 0, cumulated_size = 0; | |
516 | ||
517 | fprintf (dump_file, "Overall time: "HOST_WIDEST_INT_PRINT_DEC"\n", | |
518 | (HOST_WIDEST_INT)overall_time); | |
519 | min = get_hot_bb_threshold (); | |
520 | for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min; | |
521 | i++) | |
522 | { | |
523 | cumulated_time += histogram[i]->count * histogram[i]->time; | |
524 | cumulated_size += histogram[i]->size; | |
525 | } | |
526 | fprintf (dump_file, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC | |
527 | " Time:%3.2f%% Size:%3.2f%%\n", | |
528 | (HOST_WIDEST_INT)min, | |
529 | cumulated_time * 100.0 / overall_time, | |
530 | cumulated_size * 100.0 / overall_size); | |
531 | } | |
532 | cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000; | |
533 | threshold = 0; | |
534 | for (i = 0; cumulated < cutoff; i++) | |
535 | { | |
536 | cumulated += histogram[i]->count * histogram[i]->time; | |
537 | threshold = histogram[i]->count; | |
538 | } | |
539 | if (!threshold) | |
540 | threshold = 1; | |
541 | if (dump_file) | |
542 | { | |
543 | gcov_type cumulated_time = 0, cumulated_size = 0; | |
544 | ||
545 | for (i = 0; | |
546 | i < (int)histogram.length () && histogram[i]->count >= threshold; | |
547 | i++) | |
548 | { | |
549 | cumulated_time += histogram[i]->count * histogram[i]->time; | |
550 | cumulated_size += histogram[i]->size; | |
551 | } | |
552 | fprintf (dump_file, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC | |
553 | " Time:%3.2f%% Size:%3.2f%%\n", | |
554 | (HOST_WIDEST_INT)threshold, | |
555 | cumulated_time * 100.0 / overall_time, | |
556 | cumulated_size * 100.0 / overall_size); | |
557 | } | |
558 | if (threshold > get_hot_bb_threshold () | |
559 | || in_lto_p) | |
560 | { | |
561 | if (dump_file) | |
562 | fprintf (dump_file, "Threshold updated.\n"); | |
563 | set_hot_bb_threshold (threshold); | |
564 | } | |
565 | } | |
c3284718 | 566 | histogram.release (); |
08f835dc JH |
567 | free_alloc_pool (histogram_pool); |
568 | ||
569 | /* Produce speculative calls: we saved common traget from porfiling into | |
570 | e->common_target_id. Now, at link time, we can look up corresponding | |
571 | function node and produce speculative call. */ | |
572 | ||
573 | FOR_EACH_DEFINED_FUNCTION (n) | |
574 | { | |
575 | bool update = false; | |
576 | ||
577 | for (e = n->indirect_calls; e; e = e->next_callee) | |
578 | { | |
579 | if (n->count) | |
580 | nindirect++; | |
581 | if (e->indirect_info->common_target_id) | |
582 | { | |
583 | if (!node_map_initialized) | |
584 | init_node_map (false); | |
585 | node_map_initialized = true; | |
586 | ncommon++; | |
587 | n2 = find_func_by_profile_id (e->indirect_info->common_target_id); | |
588 | if (n2) | |
589 | { | |
590 | if (dump_file) | |
591 | { | |
592 | fprintf (dump_file, "Indirect call -> direct call from" | |
593 | " other module %s/%i => %s/%i, prob %3.2f\n", | |
fec39fa6 TS |
594 | xstrdup (n->name ()), n->order, |
595 | xstrdup (n2->name ()), n2->order, | |
08f835dc JH |
596 | e->indirect_info->common_target_probability |
597 | / (float)REG_BR_PROB_BASE); | |
598 | } | |
599 | if (e->indirect_info->common_target_probability | |
600 | < REG_BR_PROB_BASE / 2) | |
601 | { | |
602 | nuseless++; | |
603 | if (dump_file) | |
604 | fprintf (dump_file, | |
605 | "Not speculating: probability is too low.\n"); | |
606 | } | |
607 | else if (!cgraph_maybe_hot_edge_p (e)) | |
608 | { | |
609 | nuseless++; | |
610 | if (dump_file) | |
611 | fprintf (dump_file, | |
612 | "Not speculating: call is cold.\n"); | |
613 | } | |
614 | else if (cgraph_function_body_availability (n2) | |
615 | <= AVAIL_OVERWRITABLE | |
67348ccc | 616 | && symtab_can_be_discarded (n2)) |
08f835dc JH |
617 | { |
618 | nuseless++; | |
619 | if (dump_file) | |
620 | fprintf (dump_file, | |
621 | "Not speculating: target is overwritable " | |
622 | "and can be discarded.\n"); | |
623 | } | |
624 | else | |
625 | { | |
626 | /* Target may be overwritable, but profile says that | |
627 | control flow goes to this particular implementation | |
628 | of N2. Speculate on the local alias to allow inlining. | |
629 | */ | |
67348ccc | 630 | if (!symtab_can_be_discarded (n2)) |
5b79657a JH |
631 | { |
632 | cgraph_node *alias; | |
633 | alias = cgraph (symtab_nonoverwritable_alias | |
67348ccc | 634 | (n2)); |
5b79657a JH |
635 | if (alias) |
636 | n2 = alias; | |
637 | } | |
08f835dc JH |
638 | nconverted++; |
639 | cgraph_turn_edge_to_speculative | |
640 | (e, n2, | |
641 | apply_scale (e->count, | |
642 | e->indirect_info->common_target_probability), | |
643 | apply_scale (e->frequency, | |
644 | e->indirect_info->common_target_probability)); | |
645 | update = true; | |
646 | } | |
647 | } | |
648 | else | |
649 | { | |
650 | if (dump_file) | |
651 | fprintf (dump_file, "Function with profile-id %i not found.\n", | |
652 | e->indirect_info->common_target_id); | |
653 | nunknown++; | |
654 | } | |
655 | } | |
656 | } | |
657 | if (update) | |
658 | inline_update_overall_summary (n); | |
659 | } | |
660 | if (node_map_initialized) | |
661 | del_node_map (); | |
662 | if (dump_file && nindirect) | |
663 | fprintf (dump_file, | |
664 | "%i indirect calls trained.\n" | |
665 | "%i (%3.2f%%) have common target.\n" | |
666 | "%i (%3.2f%%) targets was not found.\n" | |
667 | "%i (%3.2f%%) speculations seems useless.\n" | |
668 | "%i (%3.2f%%) speculations produced.\n", | |
669 | nindirect, | |
670 | ncommon, ncommon * 100.0 / nindirect, | |
671 | nunknown, nunknown * 100.0 / nindirect, | |
672 | nuseless, nuseless * 100.0 / nindirect, | |
673 | nconverted, nconverted * 100.0 / nindirect); | |
674 | ||
675 | order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); | |
676 | order_pos = ipa_reverse_postorder (order); | |
677 | for (i = order_pos - 1; i >= 0; i--) | |
678 | { | |
679 | if (order[i]->local.local && ipa_propagate_frequency (order[i])) | |
680 | { | |
681 | for (e = order[i]->callees; e; e = e->next_callee) | |
67348ccc | 682 | if (e->callee->local.local && !e->callee->aux) |
08f835dc JH |
683 | { |
684 | something_changed = true; | |
67348ccc | 685 | e->callee->aux = (void *)1; |
08f835dc JH |
686 | } |
687 | } | |
67348ccc | 688 | order[i]->aux = NULL; |
08f835dc JH |
689 | } |
690 | ||
691 | while (something_changed) | |
692 | { | |
693 | something_changed = false; | |
694 | for (i = order_pos - 1; i >= 0; i--) | |
695 | { | |
67348ccc | 696 | if (order[i]->aux && ipa_propagate_frequency (order[i])) |
08f835dc JH |
697 | { |
698 | for (e = order[i]->callees; e; e = e->next_callee) | |
67348ccc | 699 | if (e->callee->local.local && !e->callee->aux) |
08f835dc JH |
700 | { |
701 | something_changed = true; | |
67348ccc | 702 | e->callee->aux = (void *)1; |
08f835dc JH |
703 | } |
704 | } | |
67348ccc | 705 | order[i]->aux = NULL; |
08f835dc JH |
706 | } |
707 | } | |
708 | free (order); | |
709 | return 0; | |
710 | } | |
711 | ||
712 | static bool | |
713 | gate_ipa_profile (void) | |
714 | { | |
715 | return flag_ipa_profile; | |
716 | } | |
717 | ||
718 | namespace { | |
719 | ||
720 | const pass_data pass_data_ipa_profile = | |
721 | { | |
722 | IPA_PASS, /* type */ | |
723 | "profile_estimate", /* name */ | |
724 | OPTGROUP_NONE, /* optinfo_flags */ | |
725 | true, /* has_gate */ | |
726 | true, /* has_execute */ | |
727 | TV_IPA_PROFILE, /* tv_id */ | |
728 | 0, /* properties_required */ | |
729 | 0, /* properties_provided */ | |
730 | 0, /* properties_destroyed */ | |
731 | 0, /* todo_flags_start */ | |
732 | 0, /* todo_flags_finish */ | |
733 | }; | |
734 | ||
735 | class pass_ipa_profile : public ipa_opt_pass_d | |
736 | { | |
737 | public: | |
c3284718 RS |
738 | pass_ipa_profile (gcc::context *ctxt) |
739 | : ipa_opt_pass_d (pass_data_ipa_profile, ctxt, | |
740 | ipa_profile_generate_summary, /* generate_summary */ | |
741 | ipa_profile_write_summary, /* write_summary */ | |
742 | ipa_profile_read_summary, /* read_summary */ | |
743 | NULL, /* write_optimization_summary */ | |
744 | NULL, /* read_optimization_summary */ | |
745 | NULL, /* stmt_fixup */ | |
746 | 0, /* function_transform_todo_flags_start */ | |
747 | NULL, /* function_transform */ | |
748 | NULL) /* variable_transform */ | |
08f835dc JH |
749 | {} |
750 | ||
751 | /* opt_pass methods: */ | |
752 | bool gate () { return gate_ipa_profile (); } | |
753 | unsigned int execute () { return ipa_profile (); } | |
754 | ||
755 | }; // class pass_ipa_profile | |
756 | ||
757 | } // anon namespace | |
758 | ||
759 | ipa_opt_pass_d * | |
760 | make_pass_ipa_profile (gcc::context *ctxt) | |
761 | { | |
762 | return new pass_ipa_profile (ctxt); | |
763 | } |