]>
Commit | Line | Data |
---|---|---|
08f835dc | 1 | /* Basic IPA optimizations based on profile. |
8d9254fc | 2 | Copyright (C) 2003-2020 Free Software Foundation, Inc. |
08f835dc JH |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
daf5c770 JH |
20 | /* ipa-profile pass implements the following analysis propagating profille |
21 | inter-procedurally. | |
22 | ||
23 | - Count histogram construction. This is a histogram analyzing how much | |
24 | time is spent executing statements with a given execution count read | |
1c5fd343 | 25 | from profile feedback. This histogram is complete only with LTO, |
daf5c770 JH |
26 | otherwise it contains information only about the current unit. |
27 | ||
daf5c770 JH |
28 | The information is used to set hot/cold thresholds. |
29 | - Next speculative indirect call resolution is performed: the local | |
30 | profile pass assigns profile-id to each function and provide us with a | |
31 | histogram specifying the most common target. We look up the callgraph | |
32 | node corresponding to the target and produce a speculative call. | |
33 | ||
34 | This call may or may not survive through IPA optimization based on decision | |
35 | of inliner. | |
36 | - Finally we propagate the following flags: unlikely executed, executed | |
37 | once, executed at startup and executed at exit. These flags are used to | |
026c3cfd | 38 | control code size/performance threshold and code placement (by producing |
daf5c770 | 39 | .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */ |
08f835dc JH |
40 | #include "config.h" |
41 | #include "system.h" | |
42 | #include "coretypes.h" | |
c7131fb2 | 43 | #include "backend.h" |
4d648807 | 44 | #include "tree.h" |
c7131fb2 | 45 | #include "gimple.h" |
957060b5 AM |
46 | #include "predict.h" |
47 | #include "alloc-pool.h" | |
48 | #include "tree-pass.h" | |
49 | #include "cgraph.h" | |
50 | #include "data-streamer.h" | |
5be5c238 | 51 | #include "gimple-iterator.h" |
08f835dc | 52 | #include "ipa-utils.h" |
08f835dc | 53 | #include "profile.h" |
08f835dc | 54 | #include "value-prof.h" |
08f835dc | 55 | #include "tree-inline.h" |
dd912cb8 | 56 | #include "symbol-summary.h" |
8bc5448f | 57 | #include "tree-vrp.h" |
c582198b | 58 | #include "ipa-prop.h" |
27d020cf | 59 | #include "ipa-fnsummary.h" |
08f835dc JH |
60 | |
61 | /* Entry in the histogram. */ | |
62 | ||
63 | struct histogram_entry | |
64 | { | |
65 | gcov_type count; | |
66 | int time; | |
67 | int size; | |
68 | }; | |
69 | ||
70 | /* Histogram of profile values. | |
71 | The histogram is represented as an ordered vector of entries allocated via | |
72 | histogram_pool. During construction a separate hashtable is kept to lookup | |
73 | duplicate entries. */ | |
74 | ||
75 | vec<histogram_entry *> histogram; | |
fcb87c50 | 76 | static object_allocator<histogram_entry> histogram_pool ("IPA histogram"); |
08f835dc JH |
77 | |
78 | /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */ | |
79 | ||
8d67ee55 | 80 | struct histogram_hash : nofree_ptr_hash <histogram_entry> |
08f835dc | 81 | { |
67f58944 TS |
82 | static inline hashval_t hash (const histogram_entry *); |
83 | static inline int equal (const histogram_entry *, const histogram_entry *); | |
08f835dc JH |
84 | }; |
85 | ||
86 | inline hashval_t | |
87 | histogram_hash::hash (const histogram_entry *val) | |
88 | { | |
89 | return val->count; | |
90 | } | |
91 | ||
92 | inline int | |
93 | histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2) | |
94 | { | |
95 | return val->count == val2->count; | |
96 | } | |
97 | ||
98 | /* Account TIME and SIZE executed COUNT times into HISTOGRAM. | |
99 | HASHTABLE is the on-side hash kept to avoid duplicates. */ | |
100 | ||
101 | static void | |
c203e8a7 | 102 | account_time_size (hash_table<histogram_hash> *hashtable, |
08f835dc JH |
103 | vec<histogram_entry *> &histogram, |
104 | gcov_type count, int time, int size) | |
105 | { | |
106 | histogram_entry key = {count, 0, 0}; | |
c203e8a7 | 107 | histogram_entry **val = hashtable->find_slot (&key, INSERT); |
08f835dc JH |
108 | |
109 | if (!*val) | |
110 | { | |
d7809518 | 111 | *val = histogram_pool.allocate (); |
08f835dc JH |
112 | **val = key; |
113 | histogram.safe_push (*val); | |
114 | } | |
115 | (*val)->time += time; | |
116 | (*val)->size += size; | |
117 | } | |
118 | ||
119 | int | |
120 | cmp_counts (const void *v1, const void *v2) | |
121 | { | |
122 | const histogram_entry *h1 = *(const histogram_entry * const *)v1; | |
123 | const histogram_entry *h2 = *(const histogram_entry * const *)v2; | |
124 | if (h1->count < h2->count) | |
125 | return 1; | |
126 | if (h1->count > h2->count) | |
127 | return -1; | |
128 | return 0; | |
129 | } | |
130 | ||
131 | /* Dump HISTOGRAM to FILE. */ | |
132 | ||
133 | static void | |
134 | dump_histogram (FILE *file, vec<histogram_entry *> histogram) | |
135 | { | |
136 | unsigned int i; | |
845bb366 JH |
137 | gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, |
138 | overall_size = 0; | |
08f835dc JH |
139 | |
140 | fprintf (dump_file, "Histogram:\n"); | |
141 | for (i = 0; i < histogram.length (); i++) | |
142 | { | |
143 | overall_time += histogram[i]->count * histogram[i]->time; | |
144 | overall_size += histogram[i]->size; | |
145 | } | |
146 | if (!overall_time) | |
147 | overall_time = 1; | |
148 | if (!overall_size) | |
149 | overall_size = 1; | |
150 | for (i = 0; i < histogram.length (); i++) | |
151 | { | |
152 | cumulated_time += histogram[i]->count * histogram[i]->time; | |
153 | cumulated_size += histogram[i]->size; | |
16998094 | 154 | fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n", |
a9243bfc | 155 | (int64_t) histogram[i]->count, |
08f835dc JH |
156 | histogram[i]->time, |
157 | cumulated_time * 100.0 / overall_time, | |
158 | histogram[i]->size, | |
159 | cumulated_size * 100.0 / overall_size); | |
160 | } | |
161 | } | |
162 | ||
f1ba88b1 XHL |
163 | /* Structure containing speculative target information from profile. */ |
164 | ||
165 | struct speculative_call_target | |
166 | { | |
167 | speculative_call_target (unsigned int id = 0, int prob = 0) | |
168 | : target_id (id), target_probability (prob) | |
169 | { | |
170 | } | |
171 | ||
172 | /* Profile_id of target obtained from profile. */ | |
173 | unsigned int target_id; | |
174 | /* Probability that call will land in function with target_id. */ | |
175 | unsigned int target_probability; | |
176 | }; | |
177 | ||
178 | class speculative_call_summary | |
179 | { | |
180 | public: | |
181 | speculative_call_summary () : speculative_call_targets () | |
182 | {} | |
183 | ||
184 | auto_vec<speculative_call_target> speculative_call_targets; | |
185 | ||
186 | void dump (FILE *f); | |
187 | ||
188 | }; | |
189 | ||
190 | /* Class to manage call summaries. */ | |
191 | ||
192 | class ipa_profile_call_summaries | |
193 | : public call_summary<speculative_call_summary *> | |
194 | { | |
195 | public: | |
196 | ipa_profile_call_summaries (symbol_table *table) | |
197 | : call_summary<speculative_call_summary *> (table) | |
198 | {} | |
199 | ||
200 | /* Duplicate info when an edge is cloned. */ | |
201 | virtual void duplicate (cgraph_edge *, cgraph_edge *, | |
202 | speculative_call_summary *old_sum, | |
203 | speculative_call_summary *new_sum); | |
204 | }; | |
205 | ||
206 | static ipa_profile_call_summaries *call_sums = NULL; | |
207 | ||
208 | /* Dump all information in speculative call summary to F. */ | |
209 | ||
210 | void | |
211 | speculative_call_summary::dump (FILE *f) | |
212 | { | |
213 | cgraph_node *n2; | |
214 | ||
215 | unsigned spec_count = speculative_call_targets.length (); | |
216 | for (unsigned i = 0; i < spec_count; i++) | |
217 | { | |
218 | speculative_call_target item = speculative_call_targets[i]; | |
219 | n2 = find_func_by_profile_id (item.target_id); | |
220 | if (n2) | |
221 | fprintf (f, " The %i speculative target is %s with prob %3.2f\n", i, | |
222 | n2->dump_name (), | |
223 | item.target_probability / (float) REG_BR_PROB_BASE); | |
224 | else | |
225 | fprintf (f, " The %i speculative target is %u with prob %3.2f\n", i, | |
226 | item.target_id, | |
227 | item.target_probability / (float) REG_BR_PROB_BASE); | |
228 | } | |
229 | } | |
230 | ||
231 | /* Duplicate info when an edge is cloned. */ | |
232 | ||
233 | void | |
234 | ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *, | |
235 | speculative_call_summary *old_sum, | |
236 | speculative_call_summary *new_sum) | |
237 | { | |
238 | if (!old_sum) | |
239 | return; | |
240 | ||
241 | unsigned old_count = old_sum->speculative_call_targets.length (); | |
242 | if (!old_count) | |
243 | return; | |
244 | ||
245 | new_sum->speculative_call_targets.reserve_exact (old_count); | |
246 | new_sum->speculative_call_targets.quick_grow_cleared (old_count); | |
247 | ||
248 | for (unsigned i = 0; i < old_count; i++) | |
249 | { | |
250 | new_sum->speculative_call_targets[i] | |
251 | = old_sum->speculative_call_targets[i]; | |
252 | } | |
253 | } | |
254 | ||
255 | /* Collect histogram and speculative target summaries from CFG profiles. */ | |
08f835dc JH |
256 | |
257 | static void | |
258 | ipa_profile_generate_summary (void) | |
259 | { | |
260 | struct cgraph_node *node; | |
261 | gimple_stmt_iterator gsi; | |
08f835dc JH |
262 | basic_block bb; |
263 | ||
c203e8a7 | 264 | hash_table<histogram_hash> hashtable (10); |
f1ba88b1 XHL |
265 | |
266 | gcc_checking_assert (!call_sums); | |
267 | call_sums = new ipa_profile_call_summaries (symtab); | |
268 | ||
08f835dc | 269 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
845bb366 JH |
270 | if (ENTRY_BLOCK_PTR_FOR_FN |
271 | (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ()) | |
e7a74006 JH |
272 | FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) |
273 | { | |
274 | int time = 0; | |
275 | int size = 0; | |
276 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
277 | { | |
278 | gimple *stmt = gsi_stmt (gsi); | |
279 | if (gimple_code (stmt) == GIMPLE_CALL | |
280 | && !gimple_call_fndecl (stmt)) | |
281 | { | |
282 | histogram_value h; | |
283 | h = gimple_histogram_value_of_type | |
284 | (DECL_STRUCT_FUNCTION (node->decl), | |
285 | stmt, HIST_TYPE_INDIR_CALL); | |
286 | /* No need to do sanity check: gimple_ic_transform already | |
287 | takes away bad histograms. */ | |
288 | if (h) | |
289 | { | |
92d41717 | 290 | gcov_type val, count, all; |
f1ba88b1 XHL |
291 | struct cgraph_edge *e = node->get_edge (stmt); |
292 | if (e && !e->indirect_unknown_callee) | |
293 | continue; | |
294 | ||
295 | speculative_call_summary *csum | |
296 | = call_sums->get_create (e); | |
297 | ||
298 | for (unsigned j = 0; j < GCOV_TOPN_VALUES; j++) | |
e7a74006 | 299 | { |
f1ba88b1 XHL |
300 | if (!get_nth_most_common_value (NULL, "indirect call", |
301 | h, &val, &count, &all, | |
302 | j)) | |
e7a74006 | 303 | continue; |
92d41717 | 304 | |
845bb366 | 305 | if (val == 0 || count == 0) |
f1ba88b1 XHL |
306 | continue; |
307 | ||
845bb366 | 308 | if (count > all) |
e7a74006 JH |
309 | { |
310 | if (dump_file) | |
f1ba88b1 XHL |
311 | fprintf (dump_file, |
312 | "Probability capped to 1\n"); | |
845bb366 | 313 | count = all; |
e7a74006 | 314 | } |
845bb366 JH |
315 | speculative_call_target item ( |
316 | val, GCOV_COMPUTE_SCALE (count, all)); | |
f1ba88b1 | 317 | csum->speculative_call_targets.safe_push (item); |
e7a74006 | 318 | } |
f1ba88b1 | 319 | |
845bb366 JH |
320 | gimple_remove_histogram_value |
321 | (DECL_STRUCT_FUNCTION (node->decl), stmt, h); | |
e7a74006 JH |
322 | } |
323 | } | |
324 | time += estimate_num_insns (stmt, &eni_time_weights); | |
325 | size += estimate_num_insns (stmt, &eni_size_weights); | |
326 | } | |
327 | if (bb->count.ipa_p () && bb->count.initialized_p ()) | |
845bb366 JH |
328 | account_time_size (&hashtable, histogram, |
329 | bb->count.ipa ().to_gcov_type (), | |
e7a74006 JH |
330 | time, size); |
331 | } | |
08f835dc JH |
332 | histogram.qsort (cmp_counts); |
333 | } | |
334 | ||
f1ba88b1 XHL |
335 | /* Serialize the speculative summary info for LTO. */ |
336 | ||
337 | static void | |
338 | ipa_profile_write_edge_summary (lto_simple_output_block *ob, | |
339 | speculative_call_summary *csum) | |
340 | { | |
341 | unsigned len = 0; | |
342 | ||
343 | len = csum->speculative_call_targets.length (); | |
344 | ||
345 | gcc_assert (len <= GCOV_TOPN_VALUES); | |
346 | ||
347 | streamer_write_hwi_stream (ob->main_stream, len); | |
348 | ||
349 | if (len) | |
350 | { | |
351 | unsigned spec_count = csum->speculative_call_targets.length (); | |
352 | for (unsigned i = 0; i < spec_count; i++) | |
353 | { | |
354 | speculative_call_target item = csum->speculative_call_targets[i]; | |
355 | gcc_assert (item.target_id); | |
356 | streamer_write_hwi_stream (ob->main_stream, item.target_id); | |
357 | streamer_write_hwi_stream (ob->main_stream, item.target_probability); | |
358 | } | |
359 | } | |
360 | } | |
361 | ||
08f835dc JH |
362 | /* Serialize the ipa info for lto. */ |
363 | ||
364 | static void | |
365 | ipa_profile_write_summary (void) | |
366 | { | |
367 | struct lto_simple_output_block *ob | |
368 | = lto_create_simple_output_block (LTO_section_ipa_profile); | |
369 | unsigned int i; | |
370 | ||
c3284718 | 371 | streamer_write_uhwi_stream (ob->main_stream, histogram.length ()); |
08f835dc JH |
372 | for (i = 0; i < histogram.length (); i++) |
373 | { | |
374 | streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count); | |
375 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time); | |
376 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size); | |
377 | } | |
f1ba88b1 XHL |
378 | |
379 | if (!call_sums) | |
380 | return; | |
381 | ||
382 | /* Serialize speculative targets information. */ | |
383 | unsigned int count = 0; | |
384 | lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder; | |
385 | lto_symtab_encoder_iterator lsei; | |
386 | cgraph_node *node; | |
387 | ||
388 | for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); | |
389 | lsei_next_function_in_partition (&lsei)) | |
390 | { | |
391 | node = lsei_cgraph_node (lsei); | |
392 | if (node->definition && node->has_gimple_body_p () | |
393 | && node->indirect_calls) | |
394 | count++; | |
395 | } | |
396 | ||
397 | streamer_write_uhwi_stream (ob->main_stream, count); | |
398 | ||
399 | /* Process all of the functions. */ | |
400 | for (lsei = lsei_start_function_in_partition (encoder); | |
401 | !lsei_end_p (lsei) && count; lsei_next_function_in_partition (&lsei)) | |
402 | { | |
403 | cgraph_node *node = lsei_cgraph_node (lsei); | |
404 | if (node->definition && node->has_gimple_body_p () | |
405 | && node->indirect_calls) | |
406 | { | |
407 | int node_ref = lto_symtab_encoder_encode (encoder, node); | |
408 | streamer_write_uhwi_stream (ob->main_stream, node_ref); | |
409 | ||
410 | for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) | |
411 | { | |
412 | speculative_call_summary *csum = call_sums->get_create (e); | |
413 | ipa_profile_write_edge_summary (ob, csum); | |
414 | } | |
415 | } | |
416 | } | |
417 | ||
08f835dc JH |
418 | lto_destroy_simple_output_block (ob); |
419 | } | |
420 | ||
f1ba88b1 XHL |
421 | /* Dump all profile summary data for all cgraph nodes and edges to file F. */ |
422 | ||
423 | static void | |
424 | ipa_profile_dump_all_summaries (FILE *f) | |
425 | { | |
426 | fprintf (dump_file, | |
427 | "\n========== IPA-profile speculative targets: ==========\n"); | |
428 | cgraph_node *node; | |
429 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) | |
430 | { | |
431 | fprintf (f, "\nSummary for node %s:\n", node->dump_name ()); | |
432 | for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) | |
433 | { | |
434 | fprintf (f, " Summary for %s of indirect edge %d:\n", | |
435 | e->caller->dump_name (), e->lto_stmt_uid); | |
436 | speculative_call_summary *csum = call_sums->get_create (e); | |
437 | csum->dump (f); | |
438 | } | |
439 | } | |
440 | fprintf (f, "\n\n"); | |
441 | } | |
442 | ||
443 | /* Read speculative targets information about edge for LTO WPA. */ | |
444 | ||
445 | static void | |
446 | ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge) | |
447 | { | |
448 | unsigned i, len; | |
449 | ||
450 | len = streamer_read_hwi (ib); | |
451 | gcc_assert (len <= GCOV_TOPN_VALUES); | |
452 | ||
453 | speculative_call_summary *csum = call_sums->get_create (edge); | |
454 | ||
455 | for (i = 0; i < len; i++) | |
456 | { | |
84a3effa ML |
457 | unsigned int target_id = streamer_read_hwi (ib); |
458 | int target_probability = streamer_read_hwi (ib); | |
459 | speculative_call_target item (target_id, target_probability); | |
f1ba88b1 XHL |
460 | csum->speculative_call_targets.safe_push (item); |
461 | } | |
462 | } | |
463 | ||
464 | /* Read profile speculative targets section information for LTO WPA. */ | |
465 | ||
466 | static void | |
467 | ipa_profile_read_summary_section (struct lto_file_decl_data *file_data, | |
468 | class lto_input_block *ib) | |
469 | { | |
470 | if (!ib) | |
471 | return; | |
472 | ||
473 | lto_symtab_encoder_t encoder = file_data->symtab_node_encoder; | |
474 | ||
475 | unsigned int count = streamer_read_uhwi (ib); | |
476 | ||
477 | unsigned int i; | |
478 | unsigned int index; | |
479 | cgraph_node * node; | |
480 | ||
481 | for (i = 0; i < count; i++) | |
482 | { | |
483 | index = streamer_read_uhwi (ib); | |
484 | encoder = file_data->symtab_node_encoder; | |
485 | node | |
486 | = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, index)); | |
487 | ||
488 | for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) | |
489 | ipa_profile_read_edge_summary (ib, e); | |
490 | } | |
491 | } | |
492 | ||
493 | /* Deserialize the IPA histogram and speculative targets summary info for LTO. | |
494 | */ | |
08f835dc JH |
495 | |
496 | static void | |
497 | ipa_profile_read_summary (void) | |
498 | { | |
499 | struct lto_file_decl_data ** file_data_vec | |
500 | = lto_get_file_decl_data (); | |
501 | struct lto_file_decl_data * file_data; | |
08f835dc JH |
502 | int j = 0; |
503 | ||
c203e8a7 | 504 | hash_table<histogram_hash> hashtable (10); |
08f835dc | 505 | |
f1ba88b1 XHL |
506 | gcc_checking_assert (!call_sums); |
507 | call_sums = new ipa_profile_call_summaries (symtab); | |
508 | ||
08f835dc JH |
509 | while ((file_data = file_data_vec[j++])) |
510 | { | |
511 | const char *data; | |
512 | size_t len; | |
99b1c316 | 513 | class lto_input_block *ib |
08f835dc JH |
514 | = lto_create_simple_input_block (file_data, |
515 | LTO_section_ipa_profile, | |
516 | &data, &len); | |
517 | if (ib) | |
518 | { | |
519 | unsigned int num = streamer_read_uhwi (ib); | |
520 | unsigned int n; | |
521 | for (n = 0; n < num; n++) | |
522 | { | |
523 | gcov_type count = streamer_read_gcov_count (ib); | |
524 | int time = streamer_read_uhwi (ib); | |
525 | int size = streamer_read_uhwi (ib); | |
c203e8a7 | 526 | account_time_size (&hashtable, histogram, |
08f835dc JH |
527 | count, time, size); |
528 | } | |
f1ba88b1 XHL |
529 | |
530 | ipa_profile_read_summary_section (file_data, ib); | |
531 | ||
08f835dc JH |
532 | lto_destroy_simple_input_block (file_data, |
533 | LTO_section_ipa_profile, | |
534 | ib, data, len); | |
535 | } | |
536 | } | |
08f835dc JH |
537 | histogram.qsort (cmp_counts); |
538 | } | |
539 | ||
540 | /* Data used by ipa_propagate_frequency. */ | |
541 | ||
542 | struct ipa_propagate_frequency_data | |
543 | { | |
1ede94c5 | 544 | cgraph_node *function_symbol; |
08f835dc JH |
545 | bool maybe_unlikely_executed; |
546 | bool maybe_executed_once; | |
547 | bool only_called_at_startup; | |
548 | bool only_called_at_exit; | |
549 | }; | |
550 | ||
551 | /* Worker for ipa_propagate_frequency_1. */ | |
552 | ||
553 | static bool | |
554 | ipa_propagate_frequency_1 (struct cgraph_node *node, void *data) | |
555 | { | |
556 | struct ipa_propagate_frequency_data *d; | |
557 | struct cgraph_edge *edge; | |
558 | ||
559 | d = (struct ipa_propagate_frequency_data *)data; | |
560 | for (edge = node->callers; | |
561 | edge && (d->maybe_unlikely_executed || d->maybe_executed_once | |
562 | || d->only_called_at_startup || d->only_called_at_exit); | |
563 | edge = edge->next_caller) | |
564 | { | |
1ede94c5 | 565 | if (edge->caller != d->function_symbol) |
08f835dc JH |
566 | { |
567 | d->only_called_at_startup &= edge->caller->only_called_at_startup; | |
568 | /* It makes sense to put main() together with the static constructors. | |
569 | It will be executed for sure, but rest of functions called from | |
570 | main are definitely not at startup only. */ | |
67348ccc | 571 | if (MAIN_NAME_P (DECL_NAME (edge->caller->decl))) |
08f835dc JH |
572 | d->only_called_at_startup = 0; |
573 | d->only_called_at_exit &= edge->caller->only_called_at_exit; | |
574 | } | |
daf5c770 JH |
575 | |
576 | /* When profile feedback is available, do not try to propagate too hard; | |
577 | counts are already good guide on function frequencies and roundoff | |
578 | errors can make us to push function into unlikely section even when | |
579 | it is executed by the train run. Transfer the function only if all | |
580 | callers are unlikely executed. */ | |
1ede94c5 | 581 | if (profile_info |
35d93d1d | 582 | && !(edge->callee->count.ipa () == profile_count::zero ()) |
daf5c770 | 583 | && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED |
a62bfab5 ML |
584 | || (edge->caller->inlined_to |
585 | && edge->caller->inlined_to->frequency | |
daf5c770 JH |
586 | != NODE_FREQUENCY_UNLIKELY_EXECUTED))) |
587 | d->maybe_unlikely_executed = false; | |
35d93d1d JH |
588 | if (edge->count.ipa ().initialized_p () |
589 | && !edge->count.ipa ().nonzero_p ()) | |
08f835dc JH |
590 | continue; |
591 | switch (edge->caller->frequency) | |
592 | { | |
593 | case NODE_FREQUENCY_UNLIKELY_EXECUTED: | |
594 | break; | |
595 | case NODE_FREQUENCY_EXECUTED_ONCE: | |
56f62793 ML |
596 | { |
597 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
598 | fprintf (dump_file, " Called by %s that is executed once\n", | |
3629ff8a | 599 | edge->caller->dump_name ()); |
56f62793 ML |
600 | d->maybe_unlikely_executed = false; |
601 | ipa_call_summary *s = ipa_call_summaries->get (edge); | |
602 | if (s != NULL && s->loop_depth) | |
603 | { | |
604 | d->maybe_executed_once = false; | |
605 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
606 | fprintf (dump_file, " Called in loop\n"); | |
607 | } | |
608 | break; | |
609 | } | |
08f835dc JH |
610 | case NODE_FREQUENCY_HOT: |
611 | case NODE_FREQUENCY_NORMAL: | |
612 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
613 | fprintf (dump_file, " Called by %s that is normal or hot\n", | |
3629ff8a | 614 | edge->caller->dump_name ()); |
08f835dc JH |
615 | d->maybe_unlikely_executed = false; |
616 | d->maybe_executed_once = false; | |
617 | break; | |
618 | } | |
619 | } | |
620 | return edge != NULL; | |
621 | } | |
622 | ||
daf5c770 JH |
623 | /* Return ture if NODE contains hot calls. */ |
624 | ||
625 | bool | |
626 | contains_hot_call_p (struct cgraph_node *node) | |
627 | { | |
628 | struct cgraph_edge *e; | |
629 | for (e = node->callees; e; e = e->next_callee) | |
3dafb85c | 630 | if (e->maybe_hot_p ()) |
daf5c770 JH |
631 | return true; |
632 | else if (!e->inline_failed | |
633 | && contains_hot_call_p (e->callee)) | |
634 | return true; | |
635 | for (e = node->indirect_calls; e; e = e->next_callee) | |
3dafb85c | 636 | if (e->maybe_hot_p ()) |
daf5c770 JH |
637 | return true; |
638 | return false; | |
639 | } | |
640 | ||
08f835dc JH |
641 | /* See if the frequency of NODE can be updated based on frequencies of its |
642 | callers. */ | |
643 | bool | |
644 | ipa_propagate_frequency (struct cgraph_node *node) | |
645 | { | |
1ede94c5 | 646 | struct ipa_propagate_frequency_data d = {node, true, true, true, true}; |
08f835dc JH |
647 | bool changed = false; |
648 | ||
67914693 | 649 | /* We cannot propagate anything useful about externally visible functions |
08f835dc | 650 | nor about virtuals. */ |
87f94429 | 651 | if (!node->local |
67348ccc | 652 | || node->alias |
2bf86c84 JH |
653 | || (opt_for_fn (node->decl, flag_devirtualize) |
654 | && DECL_VIRTUAL_P (node->decl))) | |
08f835dc | 655 | return false; |
67348ccc | 656 | gcc_assert (node->analyzed); |
08f835dc | 657 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3629ff8a | 658 | fprintf (dump_file, "Processing frequency %s\n", node->dump_name ()); |
08f835dc | 659 | |
1ede94c5 JH |
660 | node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d, |
661 | true); | |
08f835dc JH |
662 | |
663 | if ((d.only_called_at_startup && !d.only_called_at_exit) | |
664 | && !node->only_called_at_startup) | |
665 | { | |
666 | node->only_called_at_startup = true; | |
667 | if (dump_file) | |
668 | fprintf (dump_file, "Node %s promoted to only called at startup.\n", | |
3629ff8a | 669 | node->dump_name ()); |
08f835dc JH |
670 | changed = true; |
671 | } | |
672 | if ((d.only_called_at_exit && !d.only_called_at_startup) | |
673 | && !node->only_called_at_exit) | |
674 | { | |
675 | node->only_called_at_exit = true; | |
676 | if (dump_file) | |
677 | fprintf (dump_file, "Node %s promoted to only called at exit.\n", | |
3629ff8a | 678 | node->dump_name ()); |
08f835dc JH |
679 | changed = true; |
680 | } | |
daf5c770 JH |
681 | |
682 | /* With profile we can decide on hot/normal based on count. */ | |
1bad9c18 | 683 | if (node->count. ipa().initialized_p ()) |
daf5c770 JH |
684 | { |
685 | bool hot = false; | |
1bad9c18 JH |
686 | if (!(node->count. ipa() == profile_count::zero ()) |
687 | && node->count. ipa() >= get_hot_bb_threshold ()) | |
daf5c770 JH |
688 | hot = true; |
689 | if (!hot) | |
690 | hot |= contains_hot_call_p (node); | |
691 | if (hot) | |
692 | { | |
693 | if (node->frequency != NODE_FREQUENCY_HOT) | |
694 | { | |
695 | if (dump_file) | |
696 | fprintf (dump_file, "Node %s promoted to hot.\n", | |
3629ff8a | 697 | node->dump_name ()); |
daf5c770 JH |
698 | node->frequency = NODE_FREQUENCY_HOT; |
699 | return true; | |
700 | } | |
701 | return false; | |
702 | } | |
703 | else if (node->frequency == NODE_FREQUENCY_HOT) | |
704 | { | |
705 | if (dump_file) | |
706 | fprintf (dump_file, "Node %s reduced to normal.\n", | |
3629ff8a | 707 | node->dump_name ()); |
daf5c770 JH |
708 | node->frequency = NODE_FREQUENCY_NORMAL; |
709 | changed = true; | |
710 | } | |
711 | } | |
08f835dc JH |
712 | /* These come either from profile or user hints; never update them. */ |
713 | if (node->frequency == NODE_FREQUENCY_HOT | |
714 | || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) | |
715 | return changed; | |
716 | if (d.maybe_unlikely_executed) | |
717 | { | |
718 | node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; | |
719 | if (dump_file) | |
720 | fprintf (dump_file, "Node %s promoted to unlikely executed.\n", | |
3629ff8a | 721 | node->dump_name ()); |
08f835dc JH |
722 | changed = true; |
723 | } | |
724 | else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE) | |
725 | { | |
726 | node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; | |
727 | if (dump_file) | |
728 | fprintf (dump_file, "Node %s promoted to executed once.\n", | |
3629ff8a | 729 | node->dump_name ()); |
08f835dc JH |
730 | changed = true; |
731 | } | |
732 | return changed; | |
733 | } | |
734 | ||
d200a49f JH |
735 | /* Check that number of arguments of N agrees with E. |
736 | Be conservative when summaries are not present. */ | |
737 | ||
738 | static bool | |
739 | check_argument_count (struct cgraph_node *n, struct cgraph_edge *e) | |
740 | { | |
741 | if (!ipa_node_params_sum || !ipa_edge_args_sum) | |
742 | return true; | |
743 | class ipa_node_params *info = IPA_NODE_REF (n->function_symbol ()); | |
744 | if (!info) | |
745 | return true; | |
746 | ipa_edge_args *e_info = IPA_EDGE_REF (e); | |
d772e360 | 747 | if (!e_info) |
d200a49f JH |
748 | return true; |
749 | if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info) | |
750 | && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info) | |
751 | || !stdarg_p (TREE_TYPE (n->decl)))) | |
752 | return false; | |
753 | return true; | |
754 | } | |
755 | ||
08f835dc JH |
756 | /* Simple ipa profile pass propagating frequencies across the callgraph. */ |
757 | ||
758 | static unsigned int | |
759 | ipa_profile (void) | |
760 | { | |
761 | struct cgraph_node **order; | |
762 | struct cgraph_edge *e; | |
763 | int order_pos; | |
764 | bool something_changed = false; | |
765 | int i; | |
766 | gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0; | |
767 | struct cgraph_node *n,*n2; | |
768 | int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0; | |
95d81ba5 | 769 | int nmismatch = 0, nimpossible = 0; |
08f835dc | 770 | bool node_map_initialized = false; |
f1ba88b1 | 771 | gcov_type threshold; |
08f835dc JH |
772 | |
773 | if (dump_file) | |
774 | dump_histogram (dump_file, histogram); | |
775 | for (i = 0; i < (int)histogram.length (); i++) | |
776 | { | |
777 | overall_time += histogram[i]->count * histogram[i]->time; | |
778 | overall_size += histogram[i]->size; | |
779 | } | |
f1ba88b1 | 780 | threshold = 0; |
08f835dc JH |
781 | if (overall_time) |
782 | { | |
08f835dc | 783 | gcc_assert (overall_size); |
08f835dc | 784 | |
028d4092 | 785 | cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000; |
08f835dc JH |
786 | for (i = 0; cumulated < cutoff; i++) |
787 | { | |
788 | cumulated += histogram[i]->count * histogram[i]->time; | |
789 | threshold = histogram[i]->count; | |
790 | } | |
791 | if (!threshold) | |
792 | threshold = 1; | |
793 | if (dump_file) | |
794 | { | |
795 | gcov_type cumulated_time = 0, cumulated_size = 0; | |
796 | ||
797 | for (i = 0; | |
798 | i < (int)histogram.length () && histogram[i]->count >= threshold; | |
799 | i++) | |
800 | { | |
801 | cumulated_time += histogram[i]->count * histogram[i]->time; | |
802 | cumulated_size += histogram[i]->size; | |
803 | } | |
16998094 | 804 | fprintf (dump_file, "Determined min count: %" PRId64 |
08f835dc | 805 | " Time:%3.2f%% Size:%3.2f%%\n", |
a9243bfc | 806 | (int64_t)threshold, |
08f835dc JH |
807 | cumulated_time * 100.0 / overall_time, |
808 | cumulated_size * 100.0 / overall_size); | |
809 | } | |
512cc015 | 810 | |
e53f77c6 | 811 | if (in_lto_p) |
08f835dc JH |
812 | { |
813 | if (dump_file) | |
e53f77c6 | 814 | fprintf (dump_file, "Setting hotness threshold in LTO mode.\n"); |
08f835dc JH |
815 | set_hot_bb_threshold (threshold); |
816 | } | |
817 | } | |
c3284718 | 818 | histogram.release (); |
d7809518 | 819 | histogram_pool.release (); |
08f835dc | 820 | |
f1ba88b1 XHL |
821 | /* Produce speculative calls: we saved common target from profiling into |
822 | e->target_id. Now, at link time, we can look up corresponding | |
08f835dc JH |
823 | function node and produce speculative call. */ |
824 | ||
f1ba88b1 XHL |
825 | gcc_checking_assert (call_sums); |
826 | ||
827 | if (dump_file) | |
828 | { | |
829 | if (!node_map_initialized) | |
830 | init_node_map (false); | |
831 | node_map_initialized = true; | |
832 | ||
833 | ipa_profile_dump_all_summaries (dump_file); | |
834 | } | |
835 | ||
08f835dc JH |
836 | FOR_EACH_DEFINED_FUNCTION (n) |
837 | { | |
838 | bool update = false; | |
839 | ||
1ede94c5 JH |
840 | if (!opt_for_fn (n->decl, flag_ipa_profile)) |
841 | continue; | |
842 | ||
08f835dc JH |
843 | for (e = n->indirect_calls; e; e = e->next_callee) |
844 | { | |
3995f3a2 | 845 | if (n->count.initialized_p ()) |
08f835dc | 846 | nindirect++; |
f1ba88b1 XHL |
847 | |
848 | speculative_call_summary *csum = call_sums->get_create (e); | |
849 | unsigned spec_count = csum->speculative_call_targets.length (); | |
850 | if (spec_count) | |
08f835dc JH |
851 | { |
852 | if (!node_map_initialized) | |
f1ba88b1 | 853 | init_node_map (false); |
08f835dc JH |
854 | node_map_initialized = true; |
855 | ncommon++; | |
f1ba88b1 XHL |
856 | |
857 | if (in_lto_p) | |
858 | { | |
859 | if (dump_file) | |
860 | { | |
861 | fprintf (dump_file, | |
862 | "Updating hotness threshold in LTO mode.\n"); | |
863 | fprintf (dump_file, "Updated min count: %" PRId64 "\n", | |
864 | (int64_t) threshold / spec_count); | |
865 | } | |
866 | set_hot_bb_threshold (threshold / spec_count); | |
867 | } | |
868 | ||
869 | unsigned speculative_id = 0; | |
845bb366 | 870 | profile_count orig = e->count; |
f1ba88b1 | 871 | for (unsigned i = 0; i < spec_count; i++) |
08f835dc | 872 | { |
bf6fc129 JH |
873 | speculative_call_target item |
874 | = csum->speculative_call_targets[i]; | |
875 | n2 = find_func_by_profile_id (item.target_id); | |
876 | if (n2) | |
95d81ba5 | 877 | { |
95d81ba5 | 878 | if (dump_file) |
bf6fc129 JH |
879 | { |
880 | fprintf (dump_file, | |
881 | "Indirect call -> direct call from" | |
882 | " other module %s => %s, prob %3.2f\n", | |
883 | n->dump_name (), | |
884 | n2->dump_name (), | |
885 | item.target_probability | |
886 | / (float) REG_BR_PROB_BASE); | |
887 | } | |
845bb366 JH |
888 | if (item.target_probability |
889 | < REG_BR_PROB_BASE / GCOV_TOPN_VALUES / 2) | |
bf6fc129 JH |
890 | { |
891 | nuseless++; | |
892 | if (dump_file) | |
893 | fprintf (dump_file, | |
894 | "Not speculating: " | |
895 | "probability is too low.\n"); | |
896 | } | |
897 | else if (!e->maybe_hot_p ()) | |
898 | { | |
899 | nuseless++; | |
900 | if (dump_file) | |
901 | fprintf (dump_file, | |
902 | "Not speculating: call is cold.\n"); | |
903 | } | |
904 | else if (n2->get_availability () <= AVAIL_INTERPOSABLE | |
905 | && n2->can_be_discarded_p ()) | |
906 | { | |
907 | nuseless++; | |
908 | if (dump_file) | |
909 | fprintf (dump_file, | |
910 | "Not speculating: target is overwritable " | |
911 | "and can be discarded.\n"); | |
912 | } | |
913 | else if (!check_argument_count (n2, e)) | |
914 | { | |
915 | nmismatch++; | |
916 | if (dump_file) | |
917 | fprintf (dump_file, | |
918 | "Not speculating: " | |
919 | "parameter count mismatch\n"); | |
920 | } | |
921 | else if (e->indirect_info->polymorphic | |
922 | && !opt_for_fn (n->decl, flag_devirtualize) | |
923 | && !possible_polymorphic_call_target_p (e, n2)) | |
924 | { | |
925 | nimpossible++; | |
926 | if (dump_file) | |
927 | fprintf (dump_file, | |
928 | "Not speculating: " | |
929 | "function is not in the polymorphic " | |
930 | "call target list\n"); | |
931 | } | |
932 | else | |
933 | { | |
934 | /* Target may be overwritable, but profile says that | |
935 | control flow goes to this particular implementation | |
936 | of N2. Speculate on the local alias to allow | |
937 | inlining. */ | |
938 | if (!n2->can_be_discarded_p ()) | |
939 | { | |
940 | cgraph_node *alias; | |
941 | alias = dyn_cast<cgraph_node *> | |
942 | (n2->noninterposable_alias ()); | |
943 | if (alias) | |
944 | n2 = alias; | |
945 | } | |
946 | nconverted++; | |
845bb366 JH |
947 | profile_probability prob |
948 | = profile_probability::from_reg_br_prob_base | |
949 | (item.target_probability).adjusted (); | |
bf6fc129 | 950 | e->make_speculative (n2, |
845bb366 JH |
951 | orig.apply_probability (prob), |
952 | speculative_id); | |
bf6fc129 JH |
953 | update = true; |
954 | speculative_id++; | |
955 | } | |
95d81ba5 | 956 | } |
bf6fc129 | 957 | else |
95d81ba5 | 958 | { |
95d81ba5 JH |
959 | if (dump_file) |
960 | fprintf (dump_file, | |
bf6fc129 JH |
961 | "Function with profile-id %i not found.\n", |
962 | item.target_id); | |
963 | nunknown++; | |
08f835dc JH |
964 | } |
965 | } | |
08f835dc | 966 | } |
bf6fc129 JH |
967 | } |
968 | if (update) | |
969 | ipa_update_overall_fn_summary (n); | |
970 | } | |
08f835dc JH |
971 | if (node_map_initialized) |
972 | del_node_map (); | |
973 | if (dump_file && nindirect) | |
974 | fprintf (dump_file, | |
975 | "%i indirect calls trained.\n" | |
976 | "%i (%3.2f%%) have common target.\n" | |
977 | "%i (%3.2f%%) targets was not found.\n" | |
95d81ba5 JH |
978 | "%i (%3.2f%%) targets had parameter count mismatch.\n" |
979 | "%i (%3.2f%%) targets was not in polymorphic call target list.\n" | |
08f835dc JH |
980 | "%i (%3.2f%%) speculations seems useless.\n" |
981 | "%i (%3.2f%%) speculations produced.\n", | |
982 | nindirect, | |
983 | ncommon, ncommon * 100.0 / nindirect, | |
984 | nunknown, nunknown * 100.0 / nindirect, | |
95d81ba5 JH |
985 | nmismatch, nmismatch * 100.0 / nindirect, |
986 | nimpossible, nimpossible * 100.0 / nindirect, | |
08f835dc JH |
987 | nuseless, nuseless * 100.0 / nindirect, |
988 | nconverted, nconverted * 100.0 / nindirect); | |
989 | ||
3dafb85c | 990 | order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); |
08f835dc JH |
991 | order_pos = ipa_reverse_postorder (order); |
992 | for (i = order_pos - 1; i >= 0; i--) | |
993 | { | |
87f94429 | 994 | if (order[i]->local |
1ede94c5 JH |
995 | && opt_for_fn (order[i]->decl, flag_ipa_profile) |
996 | && ipa_propagate_frequency (order[i])) | |
08f835dc JH |
997 | { |
998 | for (e = order[i]->callees; e; e = e->next_callee) | |
87f94429 | 999 | if (e->callee->local && !e->callee->aux) |
08f835dc JH |
1000 | { |
1001 | something_changed = true; | |
67348ccc | 1002 | e->callee->aux = (void *)1; |
08f835dc JH |
1003 | } |
1004 | } | |
67348ccc | 1005 | order[i]->aux = NULL; |
08f835dc JH |
1006 | } |
1007 | ||
1008 | while (something_changed) | |
1009 | { | |
1010 | something_changed = false; | |
1011 | for (i = order_pos - 1; i >= 0; i--) | |
1012 | { | |
1ede94c5 JH |
1013 | if (order[i]->aux |
1014 | && opt_for_fn (order[i]->decl, flag_ipa_profile) | |
1015 | && ipa_propagate_frequency (order[i])) | |
08f835dc JH |
1016 | { |
1017 | for (e = order[i]->callees; e; e = e->next_callee) | |
87f94429 | 1018 | if (e->callee->local && !e->callee->aux) |
08f835dc JH |
1019 | { |
1020 | something_changed = true; | |
67348ccc | 1021 | e->callee->aux = (void *)1; |
08f835dc JH |
1022 | } |
1023 | } | |
67348ccc | 1024 | order[i]->aux = NULL; |
08f835dc JH |
1025 | } |
1026 | } | |
1027 | free (order); | |
f1ba88b1 XHL |
1028 | |
1029 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1030 | symtab->dump (dump_file); | |
1031 | ||
65be83b5 DM |
1032 | delete call_sums; |
1033 | call_sums = NULL; | |
1034 | ||
08f835dc JH |
1035 | return 0; |
1036 | } | |
1037 | ||
08f835dc JH |
1038 | namespace { |
1039 | ||
1040 | const pass_data pass_data_ipa_profile = | |
1041 | { | |
1042 | IPA_PASS, /* type */ | |
1043 | "profile_estimate", /* name */ | |
1044 | OPTGROUP_NONE, /* optinfo_flags */ | |
08f835dc JH |
1045 | TV_IPA_PROFILE, /* tv_id */ |
1046 | 0, /* properties_required */ | |
1047 | 0, /* properties_provided */ | |
1048 | 0, /* properties_destroyed */ | |
1049 | 0, /* todo_flags_start */ | |
1050 | 0, /* todo_flags_finish */ | |
1051 | }; | |
1052 | ||
1053 | class pass_ipa_profile : public ipa_opt_pass_d | |
1054 | { | |
1055 | public: | |
c3284718 RS |
1056 | pass_ipa_profile (gcc::context *ctxt) |
1057 | : ipa_opt_pass_d (pass_data_ipa_profile, ctxt, | |
1058 | ipa_profile_generate_summary, /* generate_summary */ | |
1059 | ipa_profile_write_summary, /* write_summary */ | |
1060 | ipa_profile_read_summary, /* read_summary */ | |
1061 | NULL, /* write_optimization_summary */ | |
1062 | NULL, /* read_optimization_summary */ | |
1063 | NULL, /* stmt_fixup */ | |
1064 | 0, /* function_transform_todo_flags_start */ | |
1065 | NULL, /* function_transform */ | |
1066 | NULL) /* variable_transform */ | |
08f835dc JH |
1067 | {} |
1068 | ||
1069 | /* opt_pass methods: */ | |
2bf86c84 | 1070 | virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; } |
be55bfe6 | 1071 | virtual unsigned int execute (function *) { return ipa_profile (); } |
08f835dc JH |
1072 | |
1073 | }; // class pass_ipa_profile | |
1074 | ||
1075 | } // anon namespace | |
1076 | ||
1077 | ipa_opt_pass_d * | |
1078 | make_pass_ipa_profile (gcc::context *ctxt) | |
1079 | { | |
1080 | return new pass_ipa_profile (ctxt); | |
1081 | } |