]>
Commit | Line | Data |
---|---|---|
08f835dc | 1 | /* Basic IPA optimizations based on profile. |
a5544970 | 2 | Copyright (C) 2003-2019 Free Software Foundation, Inc. |
08f835dc JH |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
daf5c770 JH |
20 | /* ipa-profile pass implements the following analysis propagating profille |
21 | inter-procedurally. | |
22 | ||
23 | - Count histogram construction. This is a histogram analyzing how much | |
24 | time is spent executing statements with a given execution count read | |
1c5fd343 | 25 | from profile feedback. This histogram is complete only with LTO, |
daf5c770 JH |
26 | otherwise it contains information only about the current unit. |
27 | ||
daf5c770 JH |
28 | The information is used to set hot/cold thresholds. |
29 | - Next speculative indirect call resolution is performed: the local | |
30 | profile pass assigns profile-id to each function and provide us with a | |
31 | histogram specifying the most common target. We look up the callgraph | |
32 | node corresponding to the target and produce a speculative call. | |
33 | ||
34 | This call may or may not survive through IPA optimization based on decision | |
35 | of inliner. | |
36 | - Finally we propagate the following flags: unlikely executed, executed | |
37 | once, executed at startup and executed at exit. These flags are used to | |
026c3cfd | 38 | control code size/performance threshold and code placement (by producing |
daf5c770 | 39 | .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */ |
08f835dc JH |
40 | #include "config.h" |
41 | #include "system.h" | |
42 | #include "coretypes.h" | |
c7131fb2 | 43 | #include "backend.h" |
4d648807 | 44 | #include "tree.h" |
c7131fb2 | 45 | #include "gimple.h" |
957060b5 AM |
46 | #include "predict.h" |
47 | #include "alloc-pool.h" | |
48 | #include "tree-pass.h" | |
49 | #include "cgraph.h" | |
50 | #include "data-streamer.h" | |
5be5c238 | 51 | #include "gimple-iterator.h" |
08f835dc | 52 | #include "ipa-utils.h" |
08f835dc JH |
53 | #include "profile.h" |
54 | #include "params.h" | |
55 | #include "value-prof.h" | |
08f835dc | 56 | #include "tree-inline.h" |
dd912cb8 | 57 | #include "symbol-summary.h" |
8bc5448f | 58 | #include "tree-vrp.h" |
c582198b | 59 | #include "ipa-prop.h" |
27d020cf | 60 | #include "ipa-fnsummary.h" |
08f835dc JH |
61 | |
62 | /* Entry in the histogram. */ | |
63 | ||
64 | struct histogram_entry | |
65 | { | |
66 | gcov_type count; | |
67 | int time; | |
68 | int size; | |
69 | }; | |
70 | ||
71 | /* Histogram of profile values. | |
72 | The histogram is represented as an ordered vector of entries allocated via | |
73 | histogram_pool. During construction a separate hashtable is kept to lookup | |
74 | duplicate entries. */ | |
75 | ||
76 | vec<histogram_entry *> histogram; | |
fcb87c50 | 77 | static object_allocator<histogram_entry> histogram_pool ("IPA histogram"); |
08f835dc JH |
78 | |
79 | /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */ | |
80 | ||
8d67ee55 | 81 | struct histogram_hash : nofree_ptr_hash <histogram_entry> |
08f835dc | 82 | { |
67f58944 TS |
83 | static inline hashval_t hash (const histogram_entry *); |
84 | static inline int equal (const histogram_entry *, const histogram_entry *); | |
08f835dc JH |
85 | }; |
86 | ||
87 | inline hashval_t | |
88 | histogram_hash::hash (const histogram_entry *val) | |
89 | { | |
90 | return val->count; | |
91 | } | |
92 | ||
93 | inline int | |
94 | histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2) | |
95 | { | |
96 | return val->count == val2->count; | |
97 | } | |
98 | ||
99 | /* Account TIME and SIZE executed COUNT times into HISTOGRAM. | |
100 | HASHTABLE is the on-side hash kept to avoid duplicates. */ | |
101 | ||
102 | static void | |
c203e8a7 | 103 | account_time_size (hash_table<histogram_hash> *hashtable, |
08f835dc JH |
104 | vec<histogram_entry *> &histogram, |
105 | gcov_type count, int time, int size) | |
106 | { | |
107 | histogram_entry key = {count, 0, 0}; | |
c203e8a7 | 108 | histogram_entry **val = hashtable->find_slot (&key, INSERT); |
08f835dc JH |
109 | |
110 | if (!*val) | |
111 | { | |
d7809518 | 112 | *val = histogram_pool.allocate (); |
08f835dc JH |
113 | **val = key; |
114 | histogram.safe_push (*val); | |
115 | } | |
116 | (*val)->time += time; | |
117 | (*val)->size += size; | |
118 | } | |
119 | ||
120 | int | |
121 | cmp_counts (const void *v1, const void *v2) | |
122 | { | |
123 | const histogram_entry *h1 = *(const histogram_entry * const *)v1; | |
124 | const histogram_entry *h2 = *(const histogram_entry * const *)v2; | |
125 | if (h1->count < h2->count) | |
126 | return 1; | |
127 | if (h1->count > h2->count) | |
128 | return -1; | |
129 | return 0; | |
130 | } | |
131 | ||
132 | /* Dump HISTOGRAM to FILE. */ | |
133 | ||
134 | static void | |
135 | dump_histogram (FILE *file, vec<histogram_entry *> histogram) | |
136 | { | |
137 | unsigned int i; | |
138 | gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0; | |
139 | ||
140 | fprintf (dump_file, "Histogram:\n"); | |
141 | for (i = 0; i < histogram.length (); i++) | |
142 | { | |
143 | overall_time += histogram[i]->count * histogram[i]->time; | |
144 | overall_size += histogram[i]->size; | |
145 | } | |
146 | if (!overall_time) | |
147 | overall_time = 1; | |
148 | if (!overall_size) | |
149 | overall_size = 1; | |
150 | for (i = 0; i < histogram.length (); i++) | |
151 | { | |
152 | cumulated_time += histogram[i]->count * histogram[i]->time; | |
153 | cumulated_size += histogram[i]->size; | |
16998094 | 154 | fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n", |
a9243bfc | 155 | (int64_t) histogram[i]->count, |
08f835dc JH |
156 | histogram[i]->time, |
157 | cumulated_time * 100.0 / overall_time, | |
158 | histogram[i]->size, | |
159 | cumulated_size * 100.0 / overall_size); | |
160 | } | |
161 | } | |
162 | ||
163 | /* Collect histogram from CFG profiles. */ | |
164 | ||
165 | static void | |
166 | ipa_profile_generate_summary (void) | |
167 | { | |
168 | struct cgraph_node *node; | |
169 | gimple_stmt_iterator gsi; | |
08f835dc JH |
170 | basic_block bb; |
171 | ||
c203e8a7 | 172 | hash_table<histogram_hash> hashtable (10); |
08f835dc JH |
173 | |
174 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) | |
e7a74006 JH |
175 | if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ()) |
176 | FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) | |
177 | { | |
178 | int time = 0; | |
179 | int size = 0; | |
180 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
181 | { | |
182 | gimple *stmt = gsi_stmt (gsi); | |
183 | if (gimple_code (stmt) == GIMPLE_CALL | |
184 | && !gimple_call_fndecl (stmt)) | |
185 | { | |
186 | histogram_value h; | |
187 | h = gimple_histogram_value_of_type | |
188 | (DECL_STRUCT_FUNCTION (node->decl), | |
189 | stmt, HIST_TYPE_INDIR_CALL); | |
190 | /* No need to do sanity check: gimple_ic_transform already | |
191 | takes away bad histograms. */ | |
192 | if (h) | |
193 | { | |
194 | /* counter 0 is target, counter 1 is number of execution we called target, | |
195 | counter 2 is total number of executions. */ | |
196 | if (h->hvalue.counters[2]) | |
197 | { | |
198 | struct cgraph_edge * e = node->get_edge (stmt); | |
199 | if (e && !e->indirect_unknown_callee) | |
200 | continue; | |
201 | e->indirect_info->common_target_id | |
202 | = h->hvalue.counters [0]; | |
203 | e->indirect_info->common_target_probability | |
204 | = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); | |
205 | if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) | |
206 | { | |
207 | if (dump_file) | |
208 | fprintf (dump_file, "Probability capped to 1\n"); | |
209 | e->indirect_info->common_target_probability = REG_BR_PROB_BASE; | |
210 | } | |
211 | } | |
212 | gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), | |
213 | stmt, h); | |
214 | } | |
215 | } | |
216 | time += estimate_num_insns (stmt, &eni_time_weights); | |
217 | size += estimate_num_insns (stmt, &eni_size_weights); | |
218 | } | |
219 | if (bb->count.ipa_p () && bb->count.initialized_p ()) | |
220 | account_time_size (&hashtable, histogram, bb->count.ipa ().to_gcov_type (), | |
221 | time, size); | |
222 | } | |
08f835dc JH |
223 | histogram.qsort (cmp_counts); |
224 | } | |
225 | ||
226 | /* Serialize the ipa info for lto. */ | |
227 | ||
228 | static void | |
229 | ipa_profile_write_summary (void) | |
230 | { | |
231 | struct lto_simple_output_block *ob | |
232 | = lto_create_simple_output_block (LTO_section_ipa_profile); | |
233 | unsigned int i; | |
234 | ||
c3284718 | 235 | streamer_write_uhwi_stream (ob->main_stream, histogram.length ()); |
08f835dc JH |
236 | for (i = 0; i < histogram.length (); i++) |
237 | { | |
238 | streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count); | |
239 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time); | |
240 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size); | |
241 | } | |
242 | lto_destroy_simple_output_block (ob); | |
243 | } | |
244 | ||
245 | /* Deserialize the ipa info for lto. */ | |
246 | ||
247 | static void | |
248 | ipa_profile_read_summary (void) | |
249 | { | |
250 | struct lto_file_decl_data ** file_data_vec | |
251 | = lto_get_file_decl_data (); | |
252 | struct lto_file_decl_data * file_data; | |
08f835dc JH |
253 | int j = 0; |
254 | ||
c203e8a7 | 255 | hash_table<histogram_hash> hashtable (10); |
08f835dc JH |
256 | |
257 | while ((file_data = file_data_vec[j++])) | |
258 | { | |
259 | const char *data; | |
260 | size_t len; | |
261 | struct lto_input_block *ib | |
262 | = lto_create_simple_input_block (file_data, | |
263 | LTO_section_ipa_profile, | |
264 | &data, &len); | |
265 | if (ib) | |
266 | { | |
267 | unsigned int num = streamer_read_uhwi (ib); | |
268 | unsigned int n; | |
269 | for (n = 0; n < num; n++) | |
270 | { | |
271 | gcov_type count = streamer_read_gcov_count (ib); | |
272 | int time = streamer_read_uhwi (ib); | |
273 | int size = streamer_read_uhwi (ib); | |
c203e8a7 | 274 | account_time_size (&hashtable, histogram, |
08f835dc JH |
275 | count, time, size); |
276 | } | |
277 | lto_destroy_simple_input_block (file_data, | |
278 | LTO_section_ipa_profile, | |
279 | ib, data, len); | |
280 | } | |
281 | } | |
08f835dc JH |
282 | histogram.qsort (cmp_counts); |
283 | } | |
284 | ||
285 | /* Data used by ipa_propagate_frequency. */ | |
286 | ||
287 | struct ipa_propagate_frequency_data | |
288 | { | |
1ede94c5 | 289 | cgraph_node *function_symbol; |
08f835dc JH |
290 | bool maybe_unlikely_executed; |
291 | bool maybe_executed_once; | |
292 | bool only_called_at_startup; | |
293 | bool only_called_at_exit; | |
294 | }; | |
295 | ||
296 | /* Worker for ipa_propagate_frequency_1. */ | |
297 | ||
298 | static bool | |
299 | ipa_propagate_frequency_1 (struct cgraph_node *node, void *data) | |
300 | { | |
301 | struct ipa_propagate_frequency_data *d; | |
302 | struct cgraph_edge *edge; | |
303 | ||
304 | d = (struct ipa_propagate_frequency_data *)data; | |
305 | for (edge = node->callers; | |
306 | edge && (d->maybe_unlikely_executed || d->maybe_executed_once | |
307 | || d->only_called_at_startup || d->only_called_at_exit); | |
308 | edge = edge->next_caller) | |
309 | { | |
1ede94c5 | 310 | if (edge->caller != d->function_symbol) |
08f835dc JH |
311 | { |
312 | d->only_called_at_startup &= edge->caller->only_called_at_startup; | |
313 | /* It makes sense to put main() together with the static constructors. | |
314 | It will be executed for sure, but rest of functions called from | |
315 | main are definitely not at startup only. */ | |
67348ccc | 316 | if (MAIN_NAME_P (DECL_NAME (edge->caller->decl))) |
08f835dc JH |
317 | d->only_called_at_startup = 0; |
318 | d->only_called_at_exit &= edge->caller->only_called_at_exit; | |
319 | } | |
daf5c770 JH |
320 | |
321 | /* When profile feedback is available, do not try to propagate too hard; | |
322 | counts are already good guide on function frequencies and roundoff | |
323 | errors can make us to push function into unlikely section even when | |
324 | it is executed by the train run. Transfer the function only if all | |
325 | callers are unlikely executed. */ | |
1ede94c5 | 326 | if (profile_info |
35d93d1d | 327 | && !(edge->callee->count.ipa () == profile_count::zero ()) |
daf5c770 JH |
328 | && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED |
329 | || (edge->caller->global.inlined_to | |
330 | && edge->caller->global.inlined_to->frequency | |
331 | != NODE_FREQUENCY_UNLIKELY_EXECUTED))) | |
332 | d->maybe_unlikely_executed = false; | |
35d93d1d JH |
333 | if (edge->count.ipa ().initialized_p () |
334 | && !edge->count.ipa ().nonzero_p ()) | |
08f835dc JH |
335 | continue; |
336 | switch (edge->caller->frequency) | |
337 | { | |
338 | case NODE_FREQUENCY_UNLIKELY_EXECUTED: | |
339 | break; | |
340 | case NODE_FREQUENCY_EXECUTED_ONCE: | |
56f62793 ML |
341 | { |
342 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
343 | fprintf (dump_file, " Called by %s that is executed once\n", | |
344 | edge->caller->name ()); | |
345 | d->maybe_unlikely_executed = false; | |
346 | ipa_call_summary *s = ipa_call_summaries->get (edge); | |
347 | if (s != NULL && s->loop_depth) | |
348 | { | |
349 | d->maybe_executed_once = false; | |
350 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
351 | fprintf (dump_file, " Called in loop\n"); | |
352 | } | |
353 | break; | |
354 | } | |
08f835dc JH |
355 | case NODE_FREQUENCY_HOT: |
356 | case NODE_FREQUENCY_NORMAL: | |
357 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
358 | fprintf (dump_file, " Called by %s that is normal or hot\n", | |
fec39fa6 | 359 | edge->caller->name ()); |
08f835dc JH |
360 | d->maybe_unlikely_executed = false; |
361 | d->maybe_executed_once = false; | |
362 | break; | |
363 | } | |
364 | } | |
365 | return edge != NULL; | |
366 | } | |
367 | ||
daf5c770 JH |
368 | /* Return ture if NODE contains hot calls. */ |
369 | ||
370 | bool | |
371 | contains_hot_call_p (struct cgraph_node *node) | |
372 | { | |
373 | struct cgraph_edge *e; | |
374 | for (e = node->callees; e; e = e->next_callee) | |
3dafb85c | 375 | if (e->maybe_hot_p ()) |
daf5c770 JH |
376 | return true; |
377 | else if (!e->inline_failed | |
378 | && contains_hot_call_p (e->callee)) | |
379 | return true; | |
380 | for (e = node->indirect_calls; e; e = e->next_callee) | |
3dafb85c | 381 | if (e->maybe_hot_p ()) |
daf5c770 JH |
382 | return true; |
383 | return false; | |
384 | } | |
385 | ||
08f835dc JH |
386 | /* See if the frequency of NODE can be updated based on frequencies of its |
387 | callers. */ | |
388 | bool | |
389 | ipa_propagate_frequency (struct cgraph_node *node) | |
390 | { | |
1ede94c5 | 391 | struct ipa_propagate_frequency_data d = {node, true, true, true, true}; |
08f835dc JH |
392 | bool changed = false; |
393 | ||
67914693 | 394 | /* We cannot propagate anything useful about externally visible functions |
08f835dc JH |
395 | nor about virtuals. */ |
396 | if (!node->local.local | |
67348ccc | 397 | || node->alias |
2bf86c84 JH |
398 | || (opt_for_fn (node->decl, flag_devirtualize) |
399 | && DECL_VIRTUAL_P (node->decl))) | |
08f835dc | 400 | return false; |
67348ccc | 401 | gcc_assert (node->analyzed); |
08f835dc | 402 | if (dump_file && (dump_flags & TDF_DETAILS)) |
fec39fa6 | 403 | fprintf (dump_file, "Processing frequency %s\n", node->name ()); |
08f835dc | 404 | |
1ede94c5 JH |
405 | node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d, |
406 | true); | |
08f835dc JH |
407 | |
408 | if ((d.only_called_at_startup && !d.only_called_at_exit) | |
409 | && !node->only_called_at_startup) | |
410 | { | |
411 | node->only_called_at_startup = true; | |
412 | if (dump_file) | |
413 | fprintf (dump_file, "Node %s promoted to only called at startup.\n", | |
fec39fa6 | 414 | node->name ()); |
08f835dc JH |
415 | changed = true; |
416 | } | |
417 | if ((d.only_called_at_exit && !d.only_called_at_startup) | |
418 | && !node->only_called_at_exit) | |
419 | { | |
420 | node->only_called_at_exit = true; | |
421 | if (dump_file) | |
422 | fprintf (dump_file, "Node %s promoted to only called at exit.\n", | |
fec39fa6 | 423 | node->name ()); |
08f835dc JH |
424 | changed = true; |
425 | } | |
daf5c770 JH |
426 | |
427 | /* With profile we can decide on hot/normal based on count. */ | |
1bad9c18 | 428 | if (node->count. ipa().initialized_p ()) |
daf5c770 JH |
429 | { |
430 | bool hot = false; | |
1bad9c18 JH |
431 | if (!(node->count. ipa() == profile_count::zero ()) |
432 | && node->count. ipa() >= get_hot_bb_threshold ()) | |
daf5c770 JH |
433 | hot = true; |
434 | if (!hot) | |
435 | hot |= contains_hot_call_p (node); | |
436 | if (hot) | |
437 | { | |
438 | if (node->frequency != NODE_FREQUENCY_HOT) | |
439 | { | |
440 | if (dump_file) | |
441 | fprintf (dump_file, "Node %s promoted to hot.\n", | |
fec39fa6 | 442 | node->name ()); |
daf5c770 JH |
443 | node->frequency = NODE_FREQUENCY_HOT; |
444 | return true; | |
445 | } | |
446 | return false; | |
447 | } | |
448 | else if (node->frequency == NODE_FREQUENCY_HOT) | |
449 | { | |
450 | if (dump_file) | |
451 | fprintf (dump_file, "Node %s reduced to normal.\n", | |
fec39fa6 | 452 | node->name ()); |
daf5c770 JH |
453 | node->frequency = NODE_FREQUENCY_NORMAL; |
454 | changed = true; | |
455 | } | |
456 | } | |
08f835dc JH |
457 | /* These come either from profile or user hints; never update them. */ |
458 | if (node->frequency == NODE_FREQUENCY_HOT | |
459 | || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) | |
460 | return changed; | |
461 | if (d.maybe_unlikely_executed) | |
462 | { | |
463 | node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; | |
464 | if (dump_file) | |
465 | fprintf (dump_file, "Node %s promoted to unlikely executed.\n", | |
fec39fa6 | 466 | node->name ()); |
08f835dc JH |
467 | changed = true; |
468 | } | |
469 | else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE) | |
470 | { | |
471 | node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; | |
472 | if (dump_file) | |
473 | fprintf (dump_file, "Node %s promoted to executed once.\n", | |
fec39fa6 | 474 | node->name ()); |
08f835dc JH |
475 | changed = true; |
476 | } | |
477 | return changed; | |
478 | } | |
479 | ||
480 | /* Simple ipa profile pass propagating frequencies across the callgraph. */ | |
481 | ||
482 | static unsigned int | |
483 | ipa_profile (void) | |
484 | { | |
485 | struct cgraph_node **order; | |
486 | struct cgraph_edge *e; | |
487 | int order_pos; | |
488 | bool something_changed = false; | |
489 | int i; | |
490 | gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0; | |
491 | struct cgraph_node *n,*n2; | |
492 | int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0; | |
95d81ba5 | 493 | int nmismatch = 0, nimpossible = 0; |
08f835dc JH |
494 | bool node_map_initialized = false; |
495 | ||
496 | if (dump_file) | |
497 | dump_histogram (dump_file, histogram); | |
498 | for (i = 0; i < (int)histogram.length (); i++) | |
499 | { | |
500 | overall_time += histogram[i]->count * histogram[i]->time; | |
501 | overall_size += histogram[i]->size; | |
502 | } | |
503 | if (overall_time) | |
504 | { | |
505 | gcov_type threshold; | |
506 | ||
507 | gcc_assert (overall_size); | |
08f835dc | 508 | |
08f835dc JH |
509 | cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000; |
510 | threshold = 0; | |
511 | for (i = 0; cumulated < cutoff; i++) | |
512 | { | |
513 | cumulated += histogram[i]->count * histogram[i]->time; | |
514 | threshold = histogram[i]->count; | |
515 | } | |
516 | if (!threshold) | |
517 | threshold = 1; | |
518 | if (dump_file) | |
519 | { | |
520 | gcov_type cumulated_time = 0, cumulated_size = 0; | |
521 | ||
522 | for (i = 0; | |
523 | i < (int)histogram.length () && histogram[i]->count >= threshold; | |
524 | i++) | |
525 | { | |
526 | cumulated_time += histogram[i]->count * histogram[i]->time; | |
527 | cumulated_size += histogram[i]->size; | |
528 | } | |
16998094 | 529 | fprintf (dump_file, "Determined min count: %" PRId64 |
08f835dc | 530 | " Time:%3.2f%% Size:%3.2f%%\n", |
a9243bfc | 531 | (int64_t)threshold, |
08f835dc JH |
532 | cumulated_time * 100.0 / overall_time, |
533 | cumulated_size * 100.0 / overall_size); | |
534 | } | |
512cc015 | 535 | |
e53f77c6 | 536 | if (in_lto_p) |
08f835dc JH |
537 | { |
538 | if (dump_file) | |
e53f77c6 | 539 | fprintf (dump_file, "Setting hotness threshold in LTO mode.\n"); |
08f835dc JH |
540 | set_hot_bb_threshold (threshold); |
541 | } | |
542 | } | |
c3284718 | 543 | histogram.release (); |
d7809518 | 544 | histogram_pool.release (); |
08f835dc JH |
545 | |
546 | /* Produce speculative calls: we saved common traget from porfiling into | |
547 | e->common_target_id. Now, at link time, we can look up corresponding | |
548 | function node and produce speculative call. */ | |
549 | ||
550 | FOR_EACH_DEFINED_FUNCTION (n) | |
551 | { | |
552 | bool update = false; | |
553 | ||
1ede94c5 JH |
554 | if (!opt_for_fn (n->decl, flag_ipa_profile)) |
555 | continue; | |
556 | ||
08f835dc JH |
557 | for (e = n->indirect_calls; e; e = e->next_callee) |
558 | { | |
3995f3a2 | 559 | if (n->count.initialized_p ()) |
08f835dc JH |
560 | nindirect++; |
561 | if (e->indirect_info->common_target_id) | |
562 | { | |
563 | if (!node_map_initialized) | |
564 | init_node_map (false); | |
565 | node_map_initialized = true; | |
566 | ncommon++; | |
567 | n2 = find_func_by_profile_id (e->indirect_info->common_target_id); | |
568 | if (n2) | |
569 | { | |
570 | if (dump_file) | |
571 | { | |
572 | fprintf (dump_file, "Indirect call -> direct call from" | |
464d0118 ML |
573 | " other module %s => %s, prob %3.2f\n", |
574 | n->dump_name (), | |
575 | n2->dump_name (), | |
08f835dc JH |
576 | e->indirect_info->common_target_probability |
577 | / (float)REG_BR_PROB_BASE); | |
578 | } | |
579 | if (e->indirect_info->common_target_probability | |
580 | < REG_BR_PROB_BASE / 2) | |
581 | { | |
582 | nuseless++; | |
583 | if (dump_file) | |
584 | fprintf (dump_file, | |
585 | "Not speculating: probability is too low.\n"); | |
586 | } | |
3dafb85c | 587 | else if (!e->maybe_hot_p ()) |
08f835dc JH |
588 | { |
589 | nuseless++; | |
590 | if (dump_file) | |
591 | fprintf (dump_file, | |
592 | "Not speculating: call is cold.\n"); | |
593 | } | |
d52f5295 ML |
594 | else if (n2->get_availability () <= AVAIL_INTERPOSABLE |
595 | && n2->can_be_discarded_p ()) | |
08f835dc JH |
596 | { |
597 | nuseless++; | |
598 | if (dump_file) | |
599 | fprintf (dump_file, | |
600 | "Not speculating: target is overwritable " | |
601 | "and can be discarded.\n"); | |
602 | } | |
6fe906a3 | 603 | else if (ipa_node_params_sum && ipa_edge_args_sum |
f65f1ae3 MJ |
604 | && (!vec_safe_is_empty |
605 | (IPA_NODE_REF (n2)->descriptors)) | |
95d81ba5 JH |
606 | && ipa_get_param_count (IPA_NODE_REF (n2)) |
607 | != ipa_get_cs_argument_count (IPA_EDGE_REF (e)) | |
608 | && (ipa_get_param_count (IPA_NODE_REF (n2)) | |
609 | >= ipa_get_cs_argument_count (IPA_EDGE_REF (e)) | |
610 | || !stdarg_p (TREE_TYPE (n2->decl)))) | |
611 | { | |
612 | nmismatch++; | |
613 | if (dump_file) | |
614 | fprintf (dump_file, | |
615 | "Not speculating: " | |
616 | "parameter count mistmatch\n"); | |
617 | } | |
618 | else if (e->indirect_info->polymorphic | |
619 | && !opt_for_fn (n->decl, flag_devirtualize) | |
620 | && !possible_polymorphic_call_target_p (e, n2)) | |
621 | { | |
622 | nimpossible++; | |
623 | if (dump_file) | |
624 | fprintf (dump_file, | |
625 | "Not speculating: " | |
626 | "function is not in the polymorphic " | |
627 | "call target list\n"); | |
628 | } | |
08f835dc JH |
629 | else |
630 | { | |
631 | /* Target may be overwritable, but profile says that | |
632 | control flow goes to this particular implementation | |
633 | of N2. Speculate on the local alias to allow inlining. | |
634 | */ | |
d52f5295 | 635 | if (!n2->can_be_discarded_p ()) |
5b79657a JH |
636 | { |
637 | cgraph_node *alias; | |
d52f5295 | 638 | alias = dyn_cast<cgraph_node *> (n2->noninterposable_alias ()); |
5b79657a JH |
639 | if (alias) |
640 | n2 = alias; | |
641 | } | |
08f835dc | 642 | nconverted++; |
3dafb85c ML |
643 | e->make_speculative |
644 | (n2, | |
3995f3a2 | 645 | e->count.apply_probability |
1bad9c18 | 646 | (e->indirect_info->common_target_probability)); |
08f835dc JH |
647 | update = true; |
648 | } | |
649 | } | |
650 | else | |
651 | { | |
652 | if (dump_file) | |
653 | fprintf (dump_file, "Function with profile-id %i not found.\n", | |
654 | e->indirect_info->common_target_id); | |
655 | nunknown++; | |
656 | } | |
657 | } | |
658 | } | |
659 | if (update) | |
0bceb671 | 660 | ipa_update_overall_fn_summary (n); |
08f835dc JH |
661 | } |
662 | if (node_map_initialized) | |
663 | del_node_map (); | |
664 | if (dump_file && nindirect) | |
665 | fprintf (dump_file, | |
666 | "%i indirect calls trained.\n" | |
667 | "%i (%3.2f%%) have common target.\n" | |
668 | "%i (%3.2f%%) targets was not found.\n" | |
95d81ba5 JH |
669 | "%i (%3.2f%%) targets had parameter count mismatch.\n" |
670 | "%i (%3.2f%%) targets was not in polymorphic call target list.\n" | |
08f835dc JH |
671 | "%i (%3.2f%%) speculations seems useless.\n" |
672 | "%i (%3.2f%%) speculations produced.\n", | |
673 | nindirect, | |
674 | ncommon, ncommon * 100.0 / nindirect, | |
675 | nunknown, nunknown * 100.0 / nindirect, | |
95d81ba5 JH |
676 | nmismatch, nmismatch * 100.0 / nindirect, |
677 | nimpossible, nimpossible * 100.0 / nindirect, | |
08f835dc JH |
678 | nuseless, nuseless * 100.0 / nindirect, |
679 | nconverted, nconverted * 100.0 / nindirect); | |
680 | ||
3dafb85c | 681 | order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); |
08f835dc JH |
682 | order_pos = ipa_reverse_postorder (order); |
683 | for (i = order_pos - 1; i >= 0; i--) | |
684 | { | |
1ede94c5 JH |
685 | if (order[i]->local.local |
686 | && opt_for_fn (order[i]->decl, flag_ipa_profile) | |
687 | && ipa_propagate_frequency (order[i])) | |
08f835dc JH |
688 | { |
689 | for (e = order[i]->callees; e; e = e->next_callee) | |
67348ccc | 690 | if (e->callee->local.local && !e->callee->aux) |
08f835dc JH |
691 | { |
692 | something_changed = true; | |
67348ccc | 693 | e->callee->aux = (void *)1; |
08f835dc JH |
694 | } |
695 | } | |
67348ccc | 696 | order[i]->aux = NULL; |
08f835dc JH |
697 | } |
698 | ||
699 | while (something_changed) | |
700 | { | |
701 | something_changed = false; | |
702 | for (i = order_pos - 1; i >= 0; i--) | |
703 | { | |
1ede94c5 JH |
704 | if (order[i]->aux |
705 | && opt_for_fn (order[i]->decl, flag_ipa_profile) | |
706 | && ipa_propagate_frequency (order[i])) | |
08f835dc JH |
707 | { |
708 | for (e = order[i]->callees; e; e = e->next_callee) | |
67348ccc | 709 | if (e->callee->local.local && !e->callee->aux) |
08f835dc JH |
710 | { |
711 | something_changed = true; | |
67348ccc | 712 | e->callee->aux = (void *)1; |
08f835dc JH |
713 | } |
714 | } | |
67348ccc | 715 | order[i]->aux = NULL; |
08f835dc JH |
716 | } |
717 | } | |
718 | free (order); | |
719 | return 0; | |
720 | } | |
721 | ||
08f835dc JH |
722 | namespace { |
723 | ||
724 | const pass_data pass_data_ipa_profile = | |
725 | { | |
726 | IPA_PASS, /* type */ | |
727 | "profile_estimate", /* name */ | |
728 | OPTGROUP_NONE, /* optinfo_flags */ | |
08f835dc JH |
729 | TV_IPA_PROFILE, /* tv_id */ |
730 | 0, /* properties_required */ | |
731 | 0, /* properties_provided */ | |
732 | 0, /* properties_destroyed */ | |
733 | 0, /* todo_flags_start */ | |
734 | 0, /* todo_flags_finish */ | |
735 | }; | |
736 | ||
737 | class pass_ipa_profile : public ipa_opt_pass_d | |
738 | { | |
739 | public: | |
c3284718 RS |
740 | pass_ipa_profile (gcc::context *ctxt) |
741 | : ipa_opt_pass_d (pass_data_ipa_profile, ctxt, | |
742 | ipa_profile_generate_summary, /* generate_summary */ | |
743 | ipa_profile_write_summary, /* write_summary */ | |
744 | ipa_profile_read_summary, /* read_summary */ | |
745 | NULL, /* write_optimization_summary */ | |
746 | NULL, /* read_optimization_summary */ | |
747 | NULL, /* stmt_fixup */ | |
748 | 0, /* function_transform_todo_flags_start */ | |
749 | NULL, /* function_transform */ | |
750 | NULL) /* variable_transform */ | |
08f835dc JH |
751 | {} |
752 | ||
753 | /* opt_pass methods: */ | |
2bf86c84 | 754 | virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; } |
be55bfe6 | 755 | virtual unsigned int execute (function *) { return ipa_profile (); } |
08f835dc JH |
756 | |
757 | }; // class pass_ipa_profile | |
758 | ||
759 | } // anon namespace | |
760 | ||
761 | ipa_opt_pass_d * | |
762 | make_pass_ipa_profile (gcc::context *ctxt) | |
763 | { | |
764 | return new pass_ipa_profile (ctxt); | |
765 | } |